commit 5a2e32aaeb22c89d1311bfac7544bea9f69903a1 Author: Niklas Gollenstede Date: Mon Apr 14 11:20:52 2025 +0200 Handout diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d95f4fd --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.build* +/build* diff --git a/CPPLINT.cfg b/CPPLINT.cfg new file mode 100644 index 0000000..22e2e0f --- /dev/null +++ b/CPPLINT.cfg @@ -0,0 +1,16 @@ +# StuBS Coding Style Checker +# +# Wir orientieren uns grob an den Google C++ Style Guide ( http://google.github.io/styleguide/cppguide.html ) +# mit primär folgenden Änderungen/Anpassungen: +# +# - Tabs statt Leerzeichen. Spart Bytes ;) +# - Zeilenlänge ist 120 +# - Keine Angaben zum Copyright +# - Aufgrund des Aufgabenbuildsystems sind neue / leere Zeilen leider nicht immer vermeidbar +# +# Zum Prüfen empfiehlt sich beispielsweise das Pythonscript CPPLINT.py ( https://github.com/cpplint/cpplint ) +# welches mit dieser Konfigurationsdatei arbeiten kann. +# +set noparent +filter=-whitespace/tab,-legal/copyright,-runtime/int,-runtime/threadsafe_fn,-readability/todo,-build/include_subdir,-runtime/references,-build/include_what_you_use,-whitespace/blank_line,-build/include,-whitespace/end_of_line,-whitespace/indent +linelength=120 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3e79950 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright 1998-2002 Institut für Verteilte Systeme (IVS), Otto-von-Guericke-Universität Magdeburg +Copyright 2002-2019 Lehrstuhl für Informatik 4, Friedrich-Alexander-Universität Erlangen-Nürnberg +Copyright 2017- System- und Rechnerarchitektur (SRA), Leibniz Universität Hannover +Copyright 2021-2023 Operating System Group (OSG), Technische Universität Hamburg +Copyright 2024- Verlässliche Systemsoftware (VSS), Technische Universität Braunschweig + +Diese Vorlage dient als Grundlage für Lehrveranstaltungen und darf nicht ohne vorherige, schriftliche Erlaubnis der Urheberrechtsinhaber veröffentlicht oder weitergegeben werden. +Es ist erlaubt und wünschenswert, diese Vorlage als Inspiration für eigene Projekte zu verwenden, es wird allerdings erbeten, dass die Vorgabe nicht mit deiner Lösung veröffentlicht wird. +Wir, als Lehrende, möchten alle teilnehmenden Studierenden dazu ermutigen eine eigene Lösung zu erstellen; eine veröffentlichte Lösung ist ein Anreiz zum Abschreiben, den wir gerne vermeiden möchten. + +This skeleton is provided as a foundation for educational purposes and therefore MUST NOT BE DISTRIBUTED OR PUBLISHED without prior, written consent of the copyright holders. +You are free to use this skeleton as inspiration for your projects, but, please, do not publish it along with your solution. +We, as lecturers, want to encourage every participating student to write a solution themself; a public solution is an allurement to copying we want to avoid. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..66ce818 --- /dev/null +++ b/Makefile @@ -0,0 +1,62 @@ +# Kernel Makefile +# try `make help` for more information +echo=$(shell which echo) -e + +# Default target +.DEFAULT_GOAL = all + +# Path to the files for the initial ramdisk (for Assignment 7) +INITRD_DIR ?= initrd/ +INITRD_TOOL ?= fs/tool/fstool +INITRD_DEP = +# 1MB free space +INITRD_FREE ?= 1048576 + +# Kernel source files +LINKER_SCRIPT = compiler/sections.ld +CC_SOURCES = $(shell find * -name "*.cc" -a ! -name '.*' -a ! -path 'test*' -a ! -path 'fs/tool/*' -a ! -path 'assets/*' -a ! -path 'tools/*') +ASM_SOURCES = $(shell find * -name "*.asm" -a ! -name '.*') + +# Target files +KERNEL = $(BUILDDIR)/system +KERNEL64 = $(KERNEL)64 +ISOFILE = $(BUILDDIR)/stubs.iso +KERNEL_LINK = $(ROOTBUILDDIR)/system.img + +# Include global variables and standard recipes +include tools/common.mk + +# Initial Ramdisk +ifneq ($(wildcard $(INITRD_DIR)*),) +INITRD = $(BUILDDIR)/initrd.img +INITRD_DEP += $(shell find $(INITRD_DIR) -type f ) +# Additional dependency for kernel +$(KERNEL): $(INITRD) +endif + +all: $(KERNEL) + +# Linking the system image +# We use the C++ compiler (which calls the actual linker) +$(KERNEL64): $(ASM_OBJECTS) $(CC_OBJECTS) $(LINKER_SCRIPT) $(MAKEFILE_LIST) + @echo "LD $@" + @mkdir -p $(@D) + $(VERBOSE) $(CXX) $(CXXFLAGS) -Wl,-T $(LINKER_SCRIPT) -o $@ $(LDFLAGS) $(ASM_OBJECTS) $(CC_OBJECTS) + @echo "LN $(KERNEL_LINK)" + $(VERBOSE) ln -sf $(@:$(ROOTBUILDDIR)/%=%) "$(KERNEL_LINK)" + +# The kernel must be a 32bit elf for multiboot compliance +$(KERNEL): $(KERNEL64) + $(VERBOSE) $(OBJCOPY) -I elf64-x86-64 -O elf32-i386 $< $@ + +# Tool for editing a Minix v3 file system image (Assignment 7) +$(INITRD_TOOL): $(shell test -d $(dir $(INITRD_TOOL)) && find $(dir $(INITRD_TOOL)) -name "*.cc" -or -name '*.h') + @echo "Make $@" + @make -C $(dir $(INITRD_TOOL)) + +# Initial Ramdisk with Minix v3 file system +$(INITRD): $(INITRD_TOOL) $(INITRD_DEP) + @echo "INITRD $@" + @dd if=/dev/zero of=$@ bs=$(shell du -s $(INITRD_DIR) | cut -f1 | xargs expr $(INITRD_FREE) + ) count=1 + @mkfs.minix -3 $@ # optional --inodes + @./$(INITRD_TOOL) put "$(INITRD_DIR)" $@ diff --git a/README.md b/README.md new file mode 100644 index 0000000..6b362ac --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +MPStuBS - Multiprozessor Studenten Betriebssystem +================================================= + +Coding Guidelines +----------------- + +Similar to [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) but with following exceptions: + - No license boilerplate + - *Tabs* instead of *Spaces* + - Line length of 120 characters + - `#pragma once` instead of `#include` guards + +The code should be *self-documenting*, don't state the obvious! +However, this does not make comments superfluous: +Since good naming is sometimes not enough, more advanced parts need to be documented, +so any operating system developer should be able to easily understand your code. + +### Naming Convention + + - **Variables**: lowercase with underscore + + char* variable_name; + + - **Constants** (and **enum** values): uppercase with underscore + + const int CONST_VALUE = 42; + + - **Type Names** (`class`/`struct`/`namespace`/`enum`): Capital letter, camel case + + class SomeClassName; + + - **Methods/Functions** (C++): start with lowercase letter, then camel case + + void someFunctionName(); + + - **extern "C" Functions**: lowercase with underscore (like variables). + + void interrupt_handler(int vector); + + - **File Names**: lowercase, main type name, underscores only if is a sub type + + folder/classname.cc diff --git a/arch/acpi.cc b/arch/acpi.cc new file mode 100644 index 0000000..a149bfb --- /dev/null +++ b/arch/acpi.cc @@ -0,0 +1,144 @@ +#include "acpi.h" + +#include "../debug/output.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" + +namespace ACPI { + +static RSDP *rsdp = 0; +static RSDT *rsdt = 0; +static XSDT *xsdt = 0; + +const char *RSDP_SIGNATURE = "RSD PTR "; + +static int checksum(const void *pos, unsigned len) { + const uint8_t *mem = reinterpret_cast(pos); + uint8_t sum = 0; + for (unsigned i = 0; i < len; i++) { + sum += mem[i]; + } + + return sum; +} + +static const RSDP *findRSDP(const void *pos, unsigned len) { + /* since the RSDP is 16-Byte aligned, we only need to check + every second 64bit memory block */ + for (unsigned block = 0; block < len / 8; block += 2) { + const uint64_t *mem = reinterpret_cast(pos) + block; + if (*mem == *reinterpret_cast(RSDP_SIGNATURE)) { + const RSDP *rsdp = reinterpret_cast(mem); + /* ACPI Specification Revision 4.0a: 5.2.5.3*/ + if ((rsdp->revision == 0 && checksum(mem, 20) == 0) || + (rsdp->length > 20 && checksum(mem, rsdp->length) == 0)) { + return rsdp; + } + } + } + return 0; +} + +bool init() { + /* ACPI Specification Revision 4.0a: + * 5.2.5.1 Finding the RSDP on IA-PC Systems + * OSPM finds the Root System Description Pointer (RSDP) structure by + * searching physical memory ranges on 16-byte boundaries for a valid + * Root System Description Pointer structure signature and checksum + * match as follows: + * * The first 1 KB of the Extended BIOS Data Area (EBDA). For EISA or + * MCA systems, the EBDA can be found in the two-byte location 40:0Eh + * on the BIOS data area. + * * The BIOS read-only memory space between 0E0000h and 0FFFFFh. + */ + const uintptr_t ebda = + static_cast(*reinterpret_cast(0x40e)); + const RSDP *rsdp = findRSDP(reinterpret_cast(ebda), 1024); + if (rsdp == nullptr) { + rsdp = findRSDP(reinterpret_cast(0xe0000), 0xfffff - 0xe0000); + } + + if (rsdp == nullptr) { + DBG_VERBOSE << "No ACPI!" << endl; + return false; + } + rsdt = reinterpret_cast(static_cast(rsdp->rsdtaddress)); + + /* If the XSDT is present we must use it; see: + * ACPI Specification Revision 4.0a: + * "An ACPI-compatible OS must use the XSDT if present." + */ + if (rsdp->revision != 0 && rsdp->length >= 36) { + xsdt = reinterpret_cast(rsdp->xsdtaddress); + } + DBG_VERBOSE << "ACPI revision " << rsdp->revision << endl; + for (unsigned i = 0; i != count(); ++i) { + SDTH *sdt = get(i); + if (sdt != nullptr) { + char *c = reinterpret_cast(&sdt->signature); + DBG_VERBOSE << i << ". " << c[0] << c[1] << c[2] << c[3] << " @ " + << reinterpret_cast(sdt) << endl; + } + } + return true; +} + +unsigned count() { + if (xsdt != nullptr) { + return (xsdt->length - 36) / 8; + } else if (rsdt != nullptr) { + return (rsdt->length - 36) / 4; + } else { + return 0; + } +} + +SDTH *get(unsigned num) { + if (xsdt != nullptr) { + SDTH *entry = reinterpret_cast(xsdt->entries[num]); + if (checksum(entry, entry->length) == 0) { + return entry; + } + } else if (rsdt != nullptr) { + SDTH *entry = + reinterpret_cast(static_cast(rsdt->entries[num])); + if (checksum(entry, entry->length) == 0) { + return entry; + } + } + return 0; +} + +SDTH *get(char a, char b, char c, char d) { + union { + char signature[4]; + uint32_t value; + }; + signature[0] = a; + signature[1] = b; + signature[2] = c; + signature[3] = d; + + if (xsdt != nullptr) { + for (unsigned i = 0; i < count(); i++) { + SDTH *entry = reinterpret_cast(xsdt->entries[i]); + if (entry->signature == value && checksum(entry, entry->length) == 0) { + return entry; + } + } + } else if (rsdt != nullptr) { + for (unsigned i = 0; i < count(); i++) { + SDTH *entry = + reinterpret_cast(static_cast(rsdt->entries[i])); + if (entry->signature == value && checksum(entry, entry->length) == 0) { + return entry; + } + } + } + return 0; +} + +int revision() { return rsdp != nullptr ? rsdp->revision : -1; } + +} // namespace ACPI diff --git a/arch/acpi.h b/arch/acpi.h new file mode 100644 index 0000000..a91580d --- /dev/null +++ b/arch/acpi.h @@ -0,0 +1,270 @@ +/*! \file + * \brief Structs and methods related to the \ref ACPI "Advanced Configuration + * and Power Interface (ACPI)"" + */ + +#pragma once +#include "../types.h" + +/*! \brief Abstracts the ACPI standard that provides interfaces for hardware + * detection, device configuration, and energy management. + * \ingroup io + * + * ACPI is the successor to APM (Advanced Power Management), aiming to give the + * operating system more control over the hardware. This extended control, for + * instance, enables the operating system to assign a particular amount of + * energy to every device (e.g., by disabling a device or changing to standby + * mode). For this purpose, BIOS and chipset provide a set of tables that + * describe the system and its components and provide routines the OS can call. + * These tables contain details about the system, such as the number of CPU + * cores and the LAPIC/IOAPIC, which are determined during system boot. + */ + +namespace ACPI { + +/*! \brief Root System Description Pointer (RSDP) + * + * The first step to using ACPI is finding the RSDP that is used to find the + * RSDT / XSDT, which themselves contain pointers to even more tables. + * + * On UEFI systems, the RSDP can be found in the EFI_SYSTEM_TABLE; for non-UEFI + * systems we have to search for the signature 'RSD PTR ' in the EBDA (Extended + * Bios Data Area) or in the memory area up to `FFFFFh`. + * + * \see [ACPI-Specification 5.2.5.3; Root System Description Pointer (RSDP) + * Structure](acpi.pdf#page=161) + */ + +struct RSDP { + char signature[8]; /* must exactly be equal to 'RSD PTR ' */ + uint8_t checksum; + char oemid[6]; + uint8_t revision; /* specifies the ACPI version */ + uint32_t rsdtaddress; /* physical address of the RSDT */ + uint32_t length; + uint64_t xsdtaddress; /* physical address of the XSDT */ + uint8_t extended_checksum; + uint8_t reserved[3]; +} __attribute__((packed)); + +/*! \brief System Description Table Header (SDTH) + * + * All System Description Tables (e.g., the RSDT) contain the same entries at + * the very beginning of the structure, which are abstracted in the SDTH. + * + * \see [ACPI-Specification 5.2.6; System Description Table + * Header](acpi.pdf#page=162) + */ +struct SDTH { + uint32_t signature; /* table id */ + uint32_t length; + uint8_t revision; + uint8_t checksum; + char oemid[6]; + char oem_table_id[8]; + uint32_t oem_revision; + uint32_t creator_id; + uint32_t creator_revision; + + /* \brief Helper method + * \return Pointer to the end of the table + */ + void *end() { return reinterpret_cast(this) + length; } +} __attribute__((packed)); + +/*! \brief Root System Description Table (RSDT) + * + * The RSDT can be found in the RSDP. The RSDT contains physical addresses of + * all other System Description Tables, for example the MADT. + * + * \see [ACPI-Specification 5.2.7; Root System Description Table + * (RSDT)](acpi.pdf#page=167) + */ + +struct RSDT : SDTH { + uint32_t entries[]; +} __attribute__((packed)); + +/*! \brief Extended System Description Table (XSDT) + * + * Like RSDT, but contains 64-bit instead of 32-bit addresses. + * + * \see [ACPI-Specification 5.2.8; Extended System Description Table + * (XSDT)](acpi.pdf#page=168) + */ + +struct XSDT : SDTH { + uint64_t entries[]; +} __attribute__((packed)); + +/*! \brief Helper structure + * + * Is used for accessing the substructures present in SRAT / MADT. + * + */ +struct SubHeader { + uint8_t type; + uint8_t length; + + /* Method to traverse multiple substructures */ + SubHeader *next() { + return reinterpret_cast(reinterpret_cast(this) + + length); + } +} __attribute__((packed)); + +/*! \brief Multiple APIC Description Table (MADT) + * + * Describes all interrupt controllers present within the system. Is used to + * obtain the IDs of the APICs, along with the number of available processor + * cores. + * + * \see [ACPI-Specification 5.2.12; Multiple APIC Description Table + * (MADT)](acpi.pdf#page=193) + */ +struct MADT : SDTH { + uint32_t local_apic_address; + uint32_t flags_pcat_compat : 1, flags_reserved : 31; + + /* method to access the first subheader */ + SubHeader *first() { + return reinterpret_cast(reinterpret_cast(this) + + sizeof(MADT)); + } +} __attribute__((packed)); + +enum class AddressSpace : uint8_t { + MEMORY = 0x0, + IO = 0x1, +}; + +/*! \brief ACPI address format + * + * The ACPI standard defines its own address format that is able to handle + * addresses both in memory address space, as well as IO-port address space. + */ +struct Address { + AddressSpace address_space; + uint8_t register_bit_width; + uint8_t register_bit_offset; + uint8_t reserved; + uint64_t address; +} __attribute__((packed)); + +// Multiple APIC Definition Structure +namespace MADS { +enum Type { + Type_LAPIC = 0, + Type_IOAPIC = 1, + Type_Interrupt_Source_Override = 2, + Type_LAPIC_Address_Override = 5, +}; + +/*! \brief Processor Local APIC (LAPIC) Structure + * + * Represents a physical processor along with its local interrupt controller. + * The MADT contains a LAPIC structure for every processor available in the + * system. + * + * \see [ACPI-Specification 5.2.12.2; Processor Local APIC + * Structure](acpi.pdf#page=195) + */ +struct LAPIC : SubHeader { + uint8_t acpi_processor_id; + uint8_t apic_id; + uint32_t flags_enabled : 1, flags_reserved : 31; /* must be 0 */ +} __attribute__((packed)); + +/*! \brief I/O APIC Structure + * + * Represents an I/O-APIC. + * The MADT contains an IOAPIC structure for every I/O APIC present in the + * system. + * + * \see [ACPI-Specification 5.2.12.3; I/O APIC Structure](acpi.pdf#page=196) + */ + +struct IOAPIC : SubHeader { + uint8_t ioapic_id; + uint8_t reserved; + uint32_t ioapic_address; + uint32_t global_system_interrupt_base; +} __attribute__((packed)); + +/*! \brief Interrupt Source Override Structure + * + * Is required to describe differences between the IA-PC standard interrupt + * definition and the actual hardware implementation. + * + * \see [ACPI-Specification 5.2.12.5; Interrupt Source Override + * Structure](acpi.pdf#page=197) + */ +struct Interrupt_Source_Override : SubHeader { + uint8_t bus; + uint8_t source; + uint32_t global_system_interrupt; + uint16_t flags_polarity : 2, flags_trigger_mode : 2, + flags_reserved : 12; /* must be 0 */ +} __attribute__((packed)); + +/*! \brief Local APIC Address Override Structure + * + * Support for 64-bit systems is achieved by replacing the 32-bit physical LAPIC + * address stored in the MADT with the corresponding 64-bit address. + * + * \see [ACPI-Specification 5.2.12.8; Local APIC Address Override + * Structure](acpi.pdf#page=199) + */ + +struct LAPIC_Address_Override : SubHeader { + uint16_t reserved; + union { + uint64_t lapic_address; + struct { + uint32_t lapic_address_low; + uint32_t lapic_address_high; + } __attribute__((packed)); + }; +} __attribute__((packed)); + +} // namespace MADS + +/*! \brief Initialize the ACPI description table + * + * Searches physical memory ranges o 16-byte boundaries for a valid Root System + * Description Pointer (RSDP) structure signature and checksum. If present, the + * superseding Extended System Description Table (XSDT) is used. + * + * \see [ACPI-Specification 5.2.5 Root System Description Pointer + * (RSDP)](acpi.pdf#page=160) + * \see [ACPI-Specification 5.2.8 Extended System Description Table + * (XSDT)](acpi.pdf#page=168) + */ +bool init(); + +/*! \brief Number of entries in the description table + */ +unsigned count(); + +/*! \brief Get entry of description table by index + * + * \param num index in description table + * \return Pointer to corresponding entry or `nullptr` if not available + */ +SDTH *get(unsigned num); + +/*! \brief Get entry of description table by four character identifier + * + * \param a first character of identifier + * \param b second character of identifier + * \param c third character of identifier + * \param d forth and last character of identifier + * \return Pointer to corresponding entry or `nullptr` if not available + */ +SDTH *get(char a, char b, char c, char d); + +/*! \brief Retrieve the revision from the Root System Description Pointer (RSDP) + */ +int revision(); + +} // namespace ACPI diff --git a/arch/apic.cc b/arch/apic.cc new file mode 100644 index 0000000..ef421e7 --- /dev/null +++ b/arch/apic.cc @@ -0,0 +1,150 @@ +#include "apic.h" + +#include "../debug/assert.h" +#include "../debug/output.h" +#include "acpi.h" +#include "core.h" +#include "ioport.h" +#include "lapic_registers.h" + +namespace APIC { + +static struct { + uint32_t id; + uintptr_t address; + uint32_t interrupt_base; +} ioapic; + +static uint8_t slot_map[16]; + +static uint8_t lapic_id[Core::MAX]; +static unsigned lapics = 0; + +bool init() { + // get Multiple APIC Definition Table (MADT) from ACPI + ACPI::MADT* madt = static_cast(ACPI::get('A', 'P', 'I', 'C')); + if (madt == 0) { + DBG_VERBOSE << "ERROR: no MADT found in ACPI" << endl; + return false; + } + + // read the local APIC address + LAPIC::base_address = static_cast(madt->local_apic_address); + DBG_VERBOSE << "LAPIC Address " + << reinterpret_cast( + static_cast(madt->local_apic_address)) + << endl; + + // PC/AT compatibility mode + if (madt->flags_pcat_compat != 0) { + // The APIC operating mode is set to compatible PIC mode - we have to change + // it. + DBG_VERBOSE << "PIC comp mode, disabling PICs." << endl; + + // Select Interrupt Mode Control Register (IMCR) + // (this register will only exist if hardware supports the PIC mode) + IOPort reg(0x22); + reg.outb(0x70); + // disable PIC mode, use APIC + IOPort imcr(0x23); + imcr.outb(0x01); + } + + // Set default mapping of external interrupt slots (might be overwritten + // below) + for (unsigned i = 0; i < sizeof(slot_map) / sizeof(slot_map[0]); i++) { + slot_map[i] = i; + } + + // Initialize invalid lapic_ids + for (unsigned i = 0; i < Core::MAX; i++) { + lapic_id[i] = INVALID_ID; + } + + // reset numbers, store apic data into arrays + for (ACPI::SubHeader* mads = madt->first(); mads < madt->end(); + mads = mads->next()) { + switch (mads->type) { + case ACPI::MADS::Type_LAPIC: { + ACPI::MADS::LAPIC* mads_lapic = static_cast(mads); + if (mads_lapic->flags_enabled == 0) { + DBG_VERBOSE << "Detected disabled LAPIC with ID " + << static_cast(mads_lapic->apic_id) << endl; + } else if (lapics >= Core::MAX) { + DBG_VERBOSE << "Got more LAPICs than Core::MAX" << endl; + } else if (mads_lapic->apic_id == INVALID_ID) { + DBG_VERBOSE << "Got invalid APIC ID" << endl; + } else { + DBG_VERBOSE << "Detected LAPIC with ID " + << static_cast(mads_lapic->apic_id) << endl; + lapic_id[lapics++] = mads_lapic->apic_id; + } + break; + } + case ACPI::MADS::Type_IOAPIC: { + ACPI::MADS::IOAPIC* mads_ioapic = + static_cast(mads); + DBG_VERBOSE << "Detected IO APIC with ID " + << static_cast(mads_ioapic->ioapic_id) + << " / Base " + << reinterpret_cast(static_cast( + mads_ioapic->global_system_interrupt_base)) + << endl; + if (mads_ioapic->global_system_interrupt_base > 23) { + DBG_VERBOSE << "Ignoring IOAPIC since we currently only support one." + << endl; + } else { + ioapic.id = mads_ioapic->ioapic_id; + ioapic.address = static_cast(mads_ioapic->ioapic_address); + ioapic.interrupt_base = mads_ioapic->global_system_interrupt_base; + } + break; + } + case ACPI::MADS::Type_Interrupt_Source_Override: { + ACPI::MADS::Interrupt_Source_Override* mads_iso = + static_cast(mads); + if (mads_iso->bus == 0) { + DBG_VERBOSE << "Overriding Interrupt Source " + << static_cast(mads_iso->source) << " with " + << mads_iso->global_system_interrupt << endl; + if (mads_iso->source < sizeof(slot_map) / sizeof(slot_map[0])) { + slot_map[mads_iso->source] = mads_iso->global_system_interrupt; + } + } else { + DBG_VERBOSE << "Override for bus " << mads_iso->bus + << " != ISA. Does not conform to ACPI." << endl; + } + break; + } + case ACPI::MADS::Type_LAPIC_Address_Override: { + ACPI::MADS::LAPIC_Address_Override* mads_lao = + static_cast(mads); + LAPIC::base_address = + static_cast(mads_lao->lapic_address_low); + DBG_VERBOSE << "Overriding LAPIC address with " + << reinterpret_cast( + static_cast(mads_lao->lapic_address)) + << endl; + break; + } + } + } + return true; +} + +uint8_t getIOAPICSlot(APIC::Device device) { return slot_map[device]; } + +uintptr_t getIOAPICAddress() { return ioapic.address; } + +uint8_t getIOAPICID() { return ioapic.id; } + +uint8_t getLogicalAPICID(uint8_t core) { + return core < Core::MAX ? (1 << core) : 0; +} + +uint8_t getLAPICID(uint8_t core) { + assert(core < Core::MAX); + return lapic_id[core]; +} + +} // namespace APIC diff --git a/arch/apic.h b/arch/apic.h new file mode 100644 index 0000000..b9e4ec1 --- /dev/null +++ b/arch/apic.h @@ -0,0 +1,82 @@ +/*! \file + * \brief Gather system information from the \ref ACPI about the \ref APIC + * "Advanced Programmable Interrupt Controller (APIC)" + */ + +#pragma once +#include "../types.h" + +/*! \brief Information about the (extended) Advanced Programmable Interrupt + * Controller + */ +namespace APIC { +/*! \brief Historic order of interrupt lines (PIC) + */ +enum Device { + TIMER = 0, ///< Programmable Interrupt Timer (\ref PIT) + KEYBOARD = 1, ///< Keyboard + COM1 = 4, ///< First serial interface + COM2 = 3, ///< Second serial interface + COM3 = 4, ///< Third serial interface (shared with COM1) + COM4 = 3, ///< Forth serial interface (shared with COM2) + FLOPPY = 6, ///< Floppy device + LPT1 = 7, ///< Printer + REALTIMECLOCK = 8, ///< Real time clock + PS2MOUSE = 12, ///< Mouse + IDE1 = 14, ///< First hard disk + IDE2 = 15 ///< Second hard disk +}; + +/*! \brief Invalid APIC ID + * + * The highest address is reserved according to xAPIC specification + */ +const uint8_t INVALID_ID = 0xff; + +/*! \brief Executes system detection + * + * Searches and evaluates the APIC entries in the \ref ACPI table. + * This function recognizes a possibly existing multicore system. + * After successful detection, the number of available CPUs (which is equal + * to the number of \ref LAPIC "local APICs") ) can be queried + * using the method \ref Core::count(). + * + * \note Called by \ref kernel_init() on BSP + * + * \return `true` if detection of the APIC entries was successful + */ +bool init(); + +/*! \brief Queries the I/O-APIC address determined during system boot + * + * \return Base address of the (first & only supported) I/O APIC + */ +uintptr_t getIOAPICAddress(); + +/*! \brief Queries of ID of the I/O-APIC determined during system boot + * + * \return Identification of the (first & only supported) I/O APIC + */ +uint8_t getIOAPICID(); + +/*! \brief Returns the pin number the \p device is connected to. + */ +uint8_t getIOAPICSlot(APIC::Device device); + +/*! \brief Returns the logical ID of the Local APIC passed for \a core. + * + * The LAPIC's logical ID is set (by StuBS) during boot such that exactly one + * bit is set per CPU core. For core 0, bit 0 is set in its ID, while core 1 has + * bit 1 set, etc. + * + * \param core The queried CPU core + */ +uint8_t getLogicalAPICID(uint8_t core); + +/*! \brief Get the Local APIC ID of a CPU + * \param core Query CPU core number + * \return LAPIC ID of CPU or INVALID_ID if invalid CPU ID + */ +uint8_t getLAPICID(uint8_t core); + +} // namespace APIC diff --git a/arch/cache.h b/arch/cache.h new file mode 100644 index 0000000..fe76c8c --- /dev/null +++ b/arch/cache.h @@ -0,0 +1,22 @@ +/*! \file + * \brief Helper for cache alignment + */ + +#pragma once + +#include "../debug/assert.h" + +// Helper for aligning to cache line (to prevent false sharing) +#ifndef CACHE_LINE_SIZE +#define CACHE_LINE_SIZE 64 +#endif +#define cache_aligned alignas(CACHE_LINE_SIZE) + +/*! + * \def assert_cache_aligned(TYPE) + * \brief Compile time check of cache alignment + * \param TYPE data type to check + */ +#define assert_cache_aligned(TYPE) \ + static_assert(sizeof(TYPE) % CACHE_LINE_SIZE == 0, \ + STRINGIFY(TYPE) "Not aligned on cache boundary") diff --git a/arch/cga.cc b/arch/cga.cc new file mode 100644 index 0000000..13b10a6 --- /dev/null +++ b/arch/cga.cc @@ -0,0 +1,21 @@ +#include "cga.h" + +namespace CGA { + +void setCursor(unsigned abs_x, unsigned abs_y) { + (void)abs_x; + (void)abs_y; +} + +void getCursor(unsigned& abs_x, unsigned& abs_y) { + (void)abs_x; + (void)abs_y; +} + +void show(unsigned abs_x, unsigned abs_y, char character, Attribute attrib) { + (void)abs_x; + (void)abs_y; + (void)character; + (void)attrib; +} +}; // namespace CGA diff --git a/arch/cga.h b/arch/cga.h new file mode 100644 index 0000000..33b61b3 --- /dev/null +++ b/arch/cga.h @@ -0,0 +1,138 @@ +/*! \file + * \brief \ref CGA provides a basic interface to display a character in + * VGA-compatible text mode + */ + +#pragma once +#include "../types.h" + +/*! \brief Basic operations in the VGA-compatible text mode + * \ingroup io + * + * This namespace provides an interface to access the screen in text mode + * (also known as CGA mode), with access directly on the hardware + * level, i.e. the video memory and the I/O ports of the graphics + * card. + */ +namespace CGA { +constexpr unsigned ROWS = 25; ///< Visible rows in text mode +constexpr unsigned COLUMNS = 80; ///< Visible columns in text mode + +/*! \brief CGA color palette + * + * Colors for the attribute byte. + * All 16 colors can be used for the foreground while the background colors + * are limited to the first eight (from`BLACK` to `LIGHT_GREY`) + */ +enum Color { + BLACK, ///< Black (fore- and background) + BLUE, ///< Blue (fore- and background) + GREEN, ///< Green (fore- and background) + CYAN, ///< Cyan (fore- and background) + RED, ///< Red (fore- and background) + MAGENTA, ///< Magenta (fore- and background) + BROWN, ///< Brown (fore- and background) + LIGHT_GREY, ///< Light grey (fore- and background) + DARK_GREY, ///< Dark grey (foreground only) + LIGHT_BLUE, ///< Light blue (foreground only) + LIGHT_GREEN, ///< Light green (foreground only) + LIGHT_CYAN, ///< Light cyan (foreground only) + LIGHT_RED, ///< Light red (foreground only) + LIGHT_MAGENTA, ///< Light magenta (foreground only) + YELLOW, ///< Yellow (foreground only) + WHITE ///< White (foreground only) +}; + +/*! \brief Structure of a character attribute + * consists of 4 bit fore- and 3 bit background color, and a single blink bit. + * + * [Bit fields](https://en.cppreference.com/w/cpp/language/bit_field) can + * notably simplify the access and code readability. + * + * \note [Type punning](https://en.wikipedia.org/wiki/Type_punning#Use_of_union) + * is indeed undefined behavior in C++. However, *gcc* explicitly allows + * this construct as a [language extension](https://gcc.gnu.org/bugs/#nonbugs). + * Some compilers ([other than + * gcc](https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#Type%2Dpunning) + * might allow this feature only by disabling strict aliasing + * (`-fno-strict-aliasing`). In \StuBS we use this feature extensively due to + * the improved code readability. + * + * \todo(11) Fill in the bitfield + */ +union Attribute { + struct { + uint8_t todo : 8; + } __attribute__((packed)); + uint8_t value; ///< combined value + + /*! \brief Attribute constructor (with default values) + * + * \todo(11) Complete constructor + * + * \param foreground Foreground color (Default: \ref LIGHT_GREY) + * \param background Background color (Default: \ref BLACK) + * \param blink Blink if `true` (default: no blinking) + */ + explicit Attribute(Color foreground = LIGHT_GREY, Color background = BLACK, + bool blink = false) { // NOLINT + (void)foreground; + (void)background; + (void)blink; + } +} __attribute__((packed)); // prevent padding by the compiler + +/*! \brief Set the keyboard hardware cursor to absolute screen position + * + * \todo(11) Implement the method using \ref IOPort + * + * \param abs_x absolute column of the keyboard hardware cursor + * \param abs_y absolute row of the keyboard hardware cursor + */ +void setCursor(unsigned abs_x, unsigned abs_y); + +/*! \brief Retrieve the keyboard hardware cursor position on screen + * + * \todo(11) Implement the method using the \ref IOPort + * + * \param abs_x absolute column of the keyboard hardware cursor + * \param abs_y absolute row of the keyboard hardware cursor + */ +void getCursor(unsigned& abs_x, unsigned& abs_y); + +/*! \brief Basic output of a character at a specific position on the screen. + * + * This method outputs the given character at the absolute screen position + * (`x`, `y`) with the specified color attribute. + * + * The position (`0`,`0`) indicates the upper left corner of the screen. + * The attribute defines characteristics such as background color, + * foreground color and blinking. + * + * \param abs_x Column (`abs_x` < \ref COLUMNS) in which the character should be + * displayed + * \param abs_y Row (`abs_y` < \ref ROWS) in which the character should be + * displayed + * \param character Character to be displayed + * \param attrib Attribute with color settings + * + * \todo(11) Implement the method + */ +void show(unsigned abs_x, unsigned abs_y, char character, + Attribute attrib = Attribute()); + +/*! \brief Structure for a cell in text mode + * + * Consisting of two bytes, character and attribute + */ +struct Cell { + char character; + Attribute attribute; + Cell(char character, Attribute attribute) + : character(character), attribute(attribute) {} +} __attribute__((packed)); + +/*! \brief Base address for linear text buffer in video memory + */ +Cell* const TEXT_BUFFER_BASE = nullptr; +}; // namespace CGA diff --git a/arch/cmos.cc b/arch/cmos.cc new file mode 100644 index 0000000..3d957f5 --- /dev/null +++ b/arch/cmos.cc @@ -0,0 +1,61 @@ +#include "cmos.h" + +#include "core_interrupt.h" +#include "ioport.h" + +namespace CMOS { +static IOPort address(0x70); +static IOPort data(0x71); + +namespace NMI { +static const uint8_t mask = 0x80; +// Cache NMI to speed things up +static bool disabled = false; + +void enable() { + bool status = Core::Interrupt::disable(); + uint8_t value = address.inb(); + value &= ~mask; + address.outb(value); + Core::Interrupt::restore(status); + disabled = false; +} + +void disable() { + bool status = Core::Interrupt::disable(); + uint8_t value = address.inb(); + value |= mask; + address.outb(value); + Core::Interrupt::restore(status); + disabled = true; +} + +bool isEnabled() { + disabled = (address.inb() & mask) != 0; + return !disabled; +} +} // namespace NMI + +static void setAddress(enum Register reg) { + uint8_t value = reg; + // The highest bit controls the Non Maskable Interrupt + // so we don't want to accidentally change it. + if (NMI::disabled) { + value |= NMI::mask; + } else { + value &= ~NMI::mask; + } + address.outb(value); +} + +uint8_t read(enum Register reg) { + setAddress(reg); + return data.inb(); +} + +void write(enum Register reg, uint8_t value) { + setAddress(reg); + data.outb(value); +} + +} // namespace CMOS diff --git a/arch/cmos.h b/arch/cmos.h new file mode 100644 index 0000000..17a801c --- /dev/null +++ b/arch/cmos.h @@ -0,0 +1,46 @@ +/*! \file + * \brief Controlling the \ref CMOS "complementary metal oxide semiconductor + * (CMOS)" + */ + +#pragma once +#include "../types.h" + +/*! + * \defgroup CMOS CMOS + * \brief complementary metal oxide semiconductor (CMOS) + */ + +/*! \brief CMOS + * \ingroup CMOS + */ +namespace CMOS { + +enum Register { + REG_SECOND = 0x0, ///< RTC + REG_ALARM_SECOND = 0x1, ///< RTC + REG_MINUTE = 0x2, ///< RTC + REG_ALARM_MINUTE = 0x3, ///< RTC + REG_HOUR = 0x4, ///< RTC + REG_ALARM_HOUR = 0x5, ///< RTC + REG_WEEKDAY = 0x6, ///< RTC + REG_DAYOFMONTH = 0x7, ///< RTC + REG_MONTH = 0x8, ///< RTC + REG_YEAR = 0x9, ///< RTC + REG_STATUS_A = 0xa, ///< RTC + REG_STATUS_B = 0xb, ///< RTC + REG_STATUS_C = 0xc, ///< RTC + REG_STATUS_D = 0xd, ///< RTC + REG_STATUS_DIAGNOSE = 0xe, + REG_STATUS_SHUTDOWN = 0xf +}; + +uint8_t read(enum Register reg); +void write(enum Register reg, uint8_t value); + +namespace NMI { +void enable(); +void disable(); +bool isEnabled(); +} // namespace NMI +} // namespace CMOS diff --git a/arch/context.asm b/arch/context.asm new file mode 100644 index 0000000..9721de1 --- /dev/null +++ b/arch/context.asm @@ -0,0 +1,20 @@ +[SECTION .text] +[GLOBAL context_switch] +[GLOBAL context_launch] +[GLOBAL fake_systemv_abi] + +; context_switch saves the registers in the current context structure +; and populates the registers from the the next context. +align 16 +context_switch: + +; context_launch populates the register set from the next context structure. +; It does not save the current registers. +align 16 ; When only one parameter is used for `align`, it will use NOP +context_launch: + +; fake_systemv_abi is used to populate the volatile argument registers used by the systemv abi (rdi, rsi, ...) +; with values from the non-volatile registers saved within the thread context (r15, r14, ...) +align 16 +fake_systemv_abi: + diff --git a/arch/context.cc b/arch/context.cc new file mode 100644 index 0000000..8026b41 --- /dev/null +++ b/arch/context.cc @@ -0,0 +1,9 @@ +#include "context.h" + +void prepareContext(void* tos, Context& context, void (*kickoff)(void*), + void* param1) { + (void)tos; + (void)context; + (void)kickoff; + (void)param1; +} diff --git a/arch/context.h b/arch/context.h new file mode 100644 index 0000000..ed77323 --- /dev/null +++ b/arch/context.h @@ -0,0 +1,118 @@ +/*! \file + * \brief Functionality required for \ref context_switch "context switching" + */ + +/*! \defgroup context Context Switch + * \brief Low-Level functionality required for context switching + */ + +#pragma once +#include "../types.h" + +/*! \brief Structure for saving the CPU context when switching coroutines. + * \ingroup context + */ +struct Context { + uintptr_t rbx; ///< RBX of the thread + uintptr_t rbp; ///< RBP of the thread + uintptr_t r12; ///< R12 of the thread + uintptr_t r13; ///< R13 of the thread + uintptr_t r14; ///< R14 of the thread + uintptr_t r15; ///< R15 of the thread + void* rsp; ///< Current stack pointer of the thread +} __attribute__((packed)); + +/*! \brief Prepares a context for its first activation. + * + * \ingroup context + * + * On first activation (during *some* context switch), the execution of a + * thread should start at its entry point (typically an implementation of \ref + * Thread::kickoff). + * + * For this, we have to prepare the thread context such that \ref + * context_switch and \ref context_launch can work with it. + * + * Just pushing the entry point onto the stack as a return address is not + * sufficient, however. + * \ref Thread::kickoff requires a pointer to the current thread as a + * parameter, which we also have to transfer. According to the 64 bit systemv + * calling convention, parameters are passed via the volatile registers `rdi, + * rsi, rcx, rdx, r8, r9`. But theses are never set during the intial context + * switch (why?). Therefore we pass the parameter using the non-volatile + * register `r15` and use a trampoline function as the actual entry point. See + * \ref fake_systemv_abi for details. + * + * `prepareContext()` can be implemented in the high-level programming language + * C++ (in file `context.cc`). + * + * \param tos Pointer to the top of stack (= address of first byte beyond + * the memory reserved for the stack) + * \param context Reference to the Context structure to be filled + * \param kickoff Pointer to the \ref Thread::kickoff function + * \param param1 first parameter for \ref Thread::kickoff function + */ +/*! + * \todo(14) Implement Function (and helper functions, if required) + */ +void prepareContext(void* tos, Context& context, void (*kickoff)(void*), + void* param1 = nullptr); + +/*! \brief Executes the context switch. + * + * \ingroup context + * + * For a clean context switch, the current register values must be stored in + * the given context struct. Subsequently, these values must be restored + * accordingly from the `next` context struct. + * + * This function must be implemented in assembler in the file `context.asm` + * (why?). It must be declared as `extern "C"`, so assembler functions are not + * C++ name mangled. + * + * \param next Pointer to the structure that the next context will be read + * from + * \param current Pointer to the structure that the current context will be + * stored in + * + * \todo(14) Implement Method + */ +extern "C" void context_switch(Context* next, Context* current); + +/*! \brief Launch context switching. + * + * To start context switching, the current context (from the boot-routines) is + * thrown away and the prepared register values within the given `next` context + * replace it. + * + * This function must be implemented in assembler in the file `context.asm` + * (why?). It must be declared as `extern "C"`, so assembler functions are not + * C++ name mangled. + * + * \ingroup context + * + * \param next Pointer to the structure that the next context will be read + * from + * + * \todo(14) Implement Method + */ +extern "C" void context_launch(Context* next); + +/*! \brief Fakes a systemv abi call. + * + * When a thread is first started, only non-volatile registers are "restored" + * from our prepared context (which is where we stored our \ref Thread::kickoff + * parameters). However, the 64 bit calling convention (systemv) dictates that + * parameters are passed via the volatile registers `rdi, rsi, rcx, rdx, r8, + * r9`. In order to call a C++ function, we have to transfer our parameters from + * the non-volatile registers (e.g. `r15, ...`) to the correct volatile ones + * (`rdi, ...`). + * + * This function must be implemented in assembler in the file `context.asm` + * (why?). It must be declared as `extern "C"`, so assembler functions are not + * C++ name mangled. + * \ingroup context + * + * \todo(14) Implement Method + */ +extern "C" void fake_systemv_abi(); diff --git a/arch/core.cc b/arch/core.cc new file mode 100644 index 0000000..94e6646 --- /dev/null +++ b/arch/core.cc @@ -0,0 +1,73 @@ +#include "core.h" + +#include "apic.h" +#include "lapic.h" + +/*! \brief Initial size of CPU core stacks + * + * Used during startup in `boot/startup.asm` + */ +extern "C" const unsigned long CPU_CORE_STACK_SIZE = 4096; + +/*! \brief Reserved memory for CPU core stacks + */ +alignas( + 16) static unsigned char cpu_core_stack[Core::MAX * CPU_CORE_STACK_SIZE]; + +/*! \brief Pointer to stack memory + * + * Incremented during startup of each core (bootstrap and application + * processors) in `boot/startup.asm` + */ +unsigned char* cpu_core_stack_pointer = cpu_core_stack; + +namespace Core { + +static unsigned cores = 0; ///< Number of available CPU cores +static volatile unsigned + core_id[255]; ///< Lookup table for CPU core IDs with LAPIC ID as index + +static unsigned online_cores = 0; ///< Number of currently online CPU cores +static bool online_core[Core::MAX]; ///< Lookup table for online CPU cores with + ///< CPU core ID as index + +void init() { + // Increment number of online CPU cores + if (__atomic_fetch_add(&online_cores, 1, __ATOMIC_RELAXED) == 0) { + // Fill Lookup table + for (unsigned i = 0; i < Core::MAX; i++) { + uint8_t lapic_id = APIC::getLAPICID(i); + if (lapic_id < APIC::INVALID_ID) { // ignore invalid LAPICs + core_id[lapic_id] = i; + cores++; + } + } + } + + // Get CPU ID + uint8_t cpu = getID(); + + // initialize local APIC with logical APIC ID + LAPIC::init(APIC::getLogicalAPICID(cpu)); + + // set current CPU online + online_core[cpu] = true; +} + +void exit() { + // CPU core offline + online_core[getID()] = false; + __atomic_fetch_sub(&online_cores, 1, __ATOMIC_RELAXED); +} + +unsigned getID() { return core_id[LAPIC::getID()]; } + +unsigned count() { return cores; } + +unsigned countOnline() { return online_cores; } + +bool isOnline(uint8_t core_id) { + return core_id > Core::MAX ? false : online_core[core_id]; +} + +} // namespace Core diff --git a/arch/core.h b/arch/core.h new file mode 100644 index 0000000..7e6b07f --- /dev/null +++ b/arch/core.h @@ -0,0 +1,117 @@ +/*! \file + * \brief Access to internals of a CPU \ref Core + */ + +/*! \defgroup sync CPU Synchronization + * + * The synchronization module houses functions useful for orchestrating multiple + * processors and their activities. Synchronisation, in this case, means + * handling the resource contention between multiple participants, running on + * either the same or different cores. + */ + +#pragma once +#include "../types.h" +#include "core_cr.h" +#include "core_interrupt.h" +#include "core_msr.h" + +/*! \brief Implements an abstraction for CPU internals. + * + * These internals include functions to \ref Core::Interrupt "allow or deny + * interrupts", access \ref Core::CR "control registers". + */ +namespace Core { + +/*! \brief Maximum number of supported CPUs + */ +constexpr unsigned MAX = 8; + +/*! \brief Get the ID of the current CPU core + * using \ref LAPIC::getID() with an internal lookup table. + * + * \return ID of current Core (a number between 0 and \ref Core::MAX) + */ +unsigned getID(); + +/*! \brief Initialize this CPU core + * + * Mark this core as *online* and setup the cores \ref LAPIC by assigning it a + * unique \ref APIC::getLogicalAPICID() "logical APIC ID" + * + * \note Should only be called from \ref kernel_init() during startup. + */ +void init(); + +/*! \brief Deinitialize this CPU core + * + * Mark this Core as *offline* + * + * \note Should only be called from \ref kernel_init() after returning from + * `main()` or `main_ap()`. + */ +void exit(); + +/*! \brief Get number of available CPU cores + * + * \return total number of cores + */ +unsigned count(); + +/*! \brief Get number of successfully started (and currently active) CPU cores + * + * \return total number of online cores + */ +unsigned countOnline(); + +/*! \brief Check if CPU core is currently active + * \param core_id ID of the CPU core + * \return `true` if successfully started and is currently active + */ +bool isOnline(uint8_t core_id); + +/*! \brief Gives the core a hint that it is executing a spinloop and should + * sleep "shortly" + * + * Improves the over-all performance when executing a spinloop by waiting a + * short moment reduce the load on the memory. + * + * \see [ISDMv2, Chapter 4. PAUSE - Spin Loop + * Hint](intel_manual_vol2.pdf#page=887) + */ +inline void pause() { asm volatile("pause\n\t" : : : "memory"); } + +/*! \brief Halt the CPU core until the next interrupt. + * + * Halts the current CPU core such that it will wake up on the next interrupt. + * Internally, this function first enables the interrupts via `sti` and then + * halts the core using `hlt`. Halted cores can only be woken by interrupts. The + * effect of `sti` is delayed by one instruction, making the sequence `sti hlt` + * atomic (if interrupts were disabled previously). + * + * \see [ISDMv2, Chapter 4. STI - Set Interrupt + * Flag](intel_manual_vol2.pdf#page=1297) + * \see [ISDMv2, Chapter 3. HLT - Halt](intel_manual_vol2.pdf#page=539) + */ +inline void idle() { asm volatile("sti\n\t hlt\n\t" : : : "memory"); } + +/*! \brief Permanently halts the core. + * + * Permanently halts the current CPU core. Internally, this function first + * disables the interrupts via `cli` and then halts the CPU core using `hlt`. As + * halted CPU cores can only be woken by interrupts, it is guaranteed that this + * core will be halted until the next reboot. The execution of die never + * returns. On multicore systems, only the executing CPU core will be halted + * permanently, other cores will continue execution. + * + * \see [ISDMv2, Chapter 3. CLI - Clear Interrupt + * Flag](intel_manual_vol2.pdf#page=245) + * \see [ISDMv2, Chapter 3. HLT - Halt](intel_manual_vol2.pdf#page=539) + */ +[[noreturn]] inline void die() { + while (true) { + asm volatile("cli\n\t hlt\n\t" : : : "memory"); + } +} + +} // namespace Core diff --git a/arch/core_cr.h b/arch/core_cr.h new file mode 100644 index 0000000..689227e --- /dev/null +++ b/arch/core_cr.h @@ -0,0 +1,83 @@ +/*! \file + * \brief Access to \ref Core::CR "Control Register" of a \ref Core "CPU core" + */ + +#pragma once +#include "../types.h" + +namespace Core { +/*! \brief Control Register 0 + * + * \see [ISDMv3, 2.5 Control Registers](intel_manual_vol3.pdf#page=74) + */ +enum CR0 { + CR0_PE = 1 << 0, ///< Protected Mode enabled + CR0_MP = 1 << 1, ///< Monitor co-processor + CR0_EM = 1 << 2, ///< Emulation (no x87 floating-point unit present) + CR0_TS = 1 << 3, ///< Task switched + CR0_ET = 1 << 4, ///< Extension type + CR0_NE = 1 << 15, ///< Numeric error + CR0_WP = 1 << 16, ///< Write protect + CR0_AM = 1 << 18, ///< Alignment mask + CR0_NW = 1 << 29, ///< Not-write through caching + CR0_CD = 1 << 30, ///< Cache disable + CR0_PG = 1 << 31, ///< Paging +}; + +/*! \brief Control Register 4 + * + * \see [ISDMv3, 2.5 Control Registers](intel_manual_vol3.pdf#page=77) + */ +enum CR4 { + CR4_VME = 1 << 0, ///< Virtual 8086 Mode Extensions + CR4_PVI = 1 << 1, ///< Protected-mode Virtual Interrupts + CR4_TSD = 1 << 2, ///< Time Stamp Disable + CR4_DE = 1 << 3, ///< Debugging Extensions + CR4_PSE = 1 << 4, ///< Page Size Extension + CR4_PAE = 1 << 5, ///< Physical Address Extension + CR4_MCE = 1 << 6, ///< Machine Check Exception + CR4_PGE = 1 << 7, ///< Page Global Enabled + CR4_PCE = 1 << 8, ///< Performance-Monitoring Counter enable + CR4_OSFXSR = + 1 << 9, ///< Operating system support for FXSAVE and FXRSTOR instructions + CR4_OSXMMEXCPT = 1 << 10, ///< Operating System Support for Unmasked SIMD + ///< Floating-Point Exceptions + CR4_UMIP = 1 << 11, ///< User-Mode Instruction Prevention + CR4_VMXE = 1 << 13, ///< Virtual Machine Extensions Enable + CR4_SMXE = 1 << 14, ///< Safer Mode Extensions Enable + CR4_FSGSBASE = 1 << 16, ///< Enables the instructions RDFSBASE, RDGSBASE, + ///< WRFSBASE, and WRGSBASE. + CR4_PCIDE = 1 << 17, ///< PCID Enable + CR4_OSXSAVE = 1 << 18, ///< XSAVE and Processor Extended States Enable + CR4_SMEP = 1 << 20, ///< Supervisor Mode Execution Protection Enable + CR4_SMAP = 1 << 21, ///< Supervisor Mode Access Prevention Enable + CR4_PKE = 1 << 22, ///< Protection Key Enable +}; + +/*! \brief Access to the Control Register + * + * \see [ISDMv3, 2.5 Control Registers](intel_manual_vol3.pdf#page=73) + * \tparam id Control Register to access + */ +template +class CR { + public: + /*! \brief Read the value of the current Control Register + * + * \return Value stored in the CR + */ + inline static uintptr_t read(void) { + uintptr_t val; + asm volatile("mov %%cr%c1, %0" : "=r"(val) : "n"(id)); + return val; + } + + /*! \brief Write a value into the current Control Register + * + * \param value Value to write into the CR + */ + inline static void write(uintptr_t value) { + asm volatile("mov %0, %%cr%c1" : : "r"(value), "n"(id)); + } +}; +} // namespace Core diff --git a/arch/core_interrupt.h b/arch/core_interrupt.h new file mode 100644 index 0000000..c71c662 --- /dev/null +++ b/arch/core_interrupt.h @@ -0,0 +1,141 @@ +/*! \file + * \brief \ref Core::Interrupt "Interrupt control" and \ref + * Core::Interrupt::Vector "interrupt vector list" + */ + +#pragma once +#include "../types.h" + +namespace Core { +/*! \brief Exception and Interrupt control + * + * \see [ISDMv3, Chapter 6 Interrupt and Exception + * Handling](intel_manual_vol3.pdf#page=185) + */ +namespace Interrupt { + +/*! \brief Bit in `FLAGS` register corresponding to the current interrupt state + */ +constexpr uintptr_t FLAG_ENABLE = 1 << 9; + +/*! \brief List of used interrupt vectors. + * + * The exception vectors from `0` to `31` are reserved for traps, faults and + * aborts. Their behavior is different for each exception, some push an *error + * code*, some are not recoverable. + * + * The vectors from `32` to `255` are user defined interrupts. + * + * \see [ISDMv3, 6.15 Exception and Interrupt + * Reference](intel_manual_vol3.pdf#page=203) + * \todo(12) Add Keyboard and Panic vector numbers + */ +enum Vector { + // Predefined Exceptions + DIVISON_BY_ZERO = + 0, ///< Divide-by-zero Error (at a `DIV`/`IDIV` instruction) + DEBUG = 1, ///< Debug exception + NON_MASKABLE_INTERRUPT = 2, ///< Non Maskable Interrupt + BREAKPOINT = 3, ///< Breakpoint exception (used for debugging) + OVERFLOW = 4, ///< Overflow exception (at `INTO` instruction) + BOUND_RANGE_EXCEEDED = 5, ///< Bound Range Exceeded (at `BOUND` instruction) + INVALID_OPCODE = 6, ///< Opcode at Instruction Pointer is invalid (you + ///< probably shouldn't be here) + DEVICE_NOT_AVAILABLE = + 7, ///< FPU/MMX/SSE instruction but corresponding extension not activated + DOUBLE_FAULT = 8, ///< Exception occurred while trying to call + ///< exception/interrupt handler + // Coprocessor Segment Overrun (Legacy) + INVALID_TSS = + 10, ///< Invalid Task State Segment selector (see error code for index) + SEGMENT_NOT_PRESENT = + 11, ///< Segment not available (see error code for selector index) + STACK_SEGMENT_FAULT = 12, ///< Stack segment not available or invalid (see + ///< error code for selector index) + GENERAL_PROTECTION_FAULT = + 13, ///< Operation not allowed (see error code for selector index) + PAGE_FAULT = 14, ///< Operation on Page (r/w/x) not allowed for current + ///< privilege (error code + `cr2`) + // reserved + FLOATING_POINT_EXCEPTION = 16, ///< x87 FPU error (at `WAIT`/`FWAIT`), + ///< accidentally \ref Core::CR0_NE set? + ALIGNMENT_CHECK = 17, ///< Unaligned memory access in userspace (Exception + ///< activated by \ref Core::CR0_AM) + MACHINE_CHECK = 18, ///< Model specific exception + SIMD_FP_EXCEPTION = + 19, ///< SSE/MMX error (if \ref Core::CR4_OSXMMEXCPT activated) + SECURITY_EXCEPTION = 31, + + // Interrupts +}; +constexpr size_t VECTORS = 256; + +/*! \brief Check if interrupts are enabled on this CPU + * + * This is done by pushing the `FLAGS` register onto stack, + * reading it into a register and checking the corresponding bit. + * + * \return `true` if enabled, `false` if disabled + */ +inline bool isEnabled() { + uintptr_t out; + asm volatile( + "pushf\n\t" + "pop %0\n\t" + : "=r"(out) + : + : "memory"); + return (out & FLAG_ENABLE) != 0; +} + +/*! \brief Allow interrupts + * + * Enables interrupt handling by executing the instruction `sti`. + * Since this instruction is delayed by one cycle, an subsequent `nop` is + * executed (to ensure deterministic behavior, independent from the compiler + * generated code) + * + * A pending interrupt (i.e., those arriving while interrupts were disabled) + * will be delivered after re-enabling interrupts. + * + * \see [ISDMv2, Chapter 4. STI - Set Interrupt + * Flag](intel_manual_vol2.pdf#page=1297) + */ +inline void enable() { asm volatile("sti\n\t nop\n\t" : : : "memory"); } + +/*! \brief Forbid interrupts + * + * Prevents interrupt handling by executing the instruction `cli`. + * Will return the previous interrupt state. + * \return `true` if interrupts were enabled at the time of executing this + * function, `false` if they were already disabled. + * + * \see [ISDMv2, Chapter 3. CLI - Ckear Interrupt + * Flag](intel_manual_vol2.pdf#page=245) + */ +inline bool disable() { + bool enabled = isEnabled(); + asm volatile("cli\n\t" : : : "memory"); + + return enabled; +} + +/*! \brief Restore interrupt + * + * Restore the interrupt state to the state prior to calling \ref disable() by + * using its return value. + * + * \note This function will never disable interrupts, even if val is false! + * This function is designed to allow nested disabling and restoring of + * the interrupt state. + * + * \param val if set to `true`, interrupts will be enabled; nothing will happen + * on false. + */ +inline void restore(bool val) { + if (val) { + enable(); + } +} +} // namespace Interrupt +} // namespace Core diff --git a/arch/core_msr.h b/arch/core_msr.h new file mode 100644 index 0000000..0e3f16d --- /dev/null +++ b/arch/core_msr.h @@ -0,0 +1,103 @@ +/*! \file + * \brief \ref Core::MSRs "Identifiers" for \ref Core::MSR "Model-Specific + * Register" + */ + +#pragma once +#include "../types.h" + +namespace Core { +/*! \brief Model-Specific Register Identifiers + * + * Selection of useful identifiers. + * + * \see [ISDMv4](intel_manual_vol4.pdf) + */ +enum MSRs { + MSR_PLATFORM_INFO = + 0xce, ///< Platform information including bus frequency (Intel) + MSR_TSC_DEADLINE = 0x6e0, ///< Register for \ref LAPIC::Timer Deadline mode + // Fast system calls + // XXX: Remove if we don't do fast syscalls + MSR_EFER = + 0xC0000080, ///< Extended Feature Enable Register, \see Core::MSR_EFER + MSR_STAR = 0xC0000081, ///< eip (protected mode), ring 0 and 3 segment bases + MSR_LSTAR = 0xC0000082, ///< rip (long mode) + MSR_SFMASK = 0xC0000084, ///< lower 32 bit: flag mask, if bit is set + ///< corresponding rflag is cleared through syscall + + // CPU local variables + MSR_FS_BASE = 0xC0000100, + MSR_GS_BASE = 0xC0000101, ///< Current GS base pointer + MSR_SHADOW_GS_BASE = 0xC0000102, ///< Usually called `MSR_KERNEL_GS_BASE` but + ///< this is misleading +}; + +/* \brief Important bits in Extended Feature Enable Register (EFER) + * + * \see [ISDMv3, 2.2.1 Extended Feature Enable + * Register](intel_manual_vol3.pdf#page=69) + * \see [AAPMv2, 3.1.7 Extended Feature Enable + * Register](amd64_manual_vol2.pdf#page=107) + */ +enum MSR_EFER { + MSR_EFER_SCE = 1 << 0, ///< System Call Extensions + MSR_EFER_LME = 1 << 8, ///< Long mode enable + MSR_EFER_LMA = 1 << 10, ///< Long mode active + MSR_EFER_NXE = 1 << 11, ///< No-Execute Enable + MSR_EFER_SVME = 1 << 12, ///< Secure Virtual Machine Enable + MSR_EFER_LMSLE = 1 << 13, ///< Long Mode Segment Limit Enable + MSR_EFER_FFXSR = 1 << 14, ///< Fast `FXSAVE`/`FXRSTOR` instruction + MSR_EFER_TCE = 1 << 15, ///< Translation Cache Extension +}; + +/*! \brief Access to the Model-Specific Register (MSR) + * + * \see [ISDMv3, 9.4 Model-Specific Registers + * (MSRs)](intel_manual_vol3.pdf#page=319) + * \see [ISDMv4](intel_manual_vol4.pdf) + * \tparam id ID of the Model-Specific Register to access + */ +template +class MSR { + /*! \brief Helper to access low and high bits of a 64 bit value + * \internal + */ + union uint64_parts { + struct { + uint32_t low; + uint32_t high; + } __attribute__((packed)); + uint64_t value; + + explicit uint64_parts(uint64_t value = 0) : value(value) {} + }; + + public: + /*! \brief Read the value of the current MSR + * + * \return Value stored in the MSR + * + * \see [ISDMv2, Chapter 4. RDMSR - Read from Model Specific + * Register](intel_manual_vol2.pdf#page=1186) + */ + static inline uint64_t read() { + uint64_parts p; + asm volatile("rdmsr \n\t" : "=a"(p.low), "=d"(p.high) : "c"(id)); + return p.value; + } + + /*! \brief Write a value into the current MSR + * + * \param value Value to write into the MSR + * + * \see [ISDMv2, Chapter 5. WRMSR - Write to Model Specific + * Register](intel_manual_vol2.pdf#page=1912) + */ + static inline void write(uint64_t value) { + uint64_parts p(value); + asm volatile("wrmsr \n\t" : : "c"(id), "a"(p.low), "d"(p.high)); + } +}; + +} // namespace Core diff --git a/arch/cpuid.h b/arch/cpuid.h new file mode 100644 index 0000000..1bed233 --- /dev/null +++ b/arch/cpuid.h @@ -0,0 +1,201 @@ +/*! \file + * \brief \ref CPUID queries information about the processor + */ +#pragma once +#include "../types.h" + +/*! \brief Query information about the processor + * + * \note This is an interface to the `cpuid` instruction, which can return + * information about the processor. It should therefor **not** be confused with + * functionality to + * \ref Core::getID() "retrieve the ID of the current CPU (core)"! + */ +namespace CPUID { + +/*! \brief Structure for register values returned by `cpuid` instruction + */ +union Reg { + struct { + uint32_t ebx, edx, ecx, eax; + }; + char value[16]; +}; + +enum Function { + HIGHEST_FUNCTION_PARAMETER = 0x0, ///< Maximum Input Value for Basic CPUID + ///< Information (in register `eax`) + MANUFACTURER_ID = 0x0, ///< CPU String (in register `ebx`, `ecx` and `edx`) + PROCESSOR_INFO = 0x1, ///< Version Information like Type, Family, Model (in + ///< register `eax`) + FEATURE_BITS = 0x1, ///< Feature Information (in register `ecx` and `edx`) + CACHE_INFORMATION = 0x2, ///< Cache and TLB Information + PROCESSOR_SERIAL_NUMBER = 0x3, ///< deprecated + HIGHEST_EXTENDED_FUNCTION = + 0x80000000, ///< Maximum Input Value for Extended Function CPUID (in + ///< register `eax`) + EXTENDED_PROCESSOR_INFO = 0x80000001, ///< Extended Processor Signature and + ///< Feature Bits (in register `eax`) + EXTENDED_FEATURE_BITS = 0x80000001, ///< Extended Feature Information (in + ///< register `ecx` and `edx`) + PROCESSOR_BRAND_STRING_1 = 0x80000002, ///< Processor Brand String (1/3) + PROCESSOR_BRAND_STRING_2 = 0x80000003, ///< Processor Brand String (2/3) + PROCESSOR_BRAND_STRING_3 = 0x80000004, ///< Processor Brand String (3/3) + ADVANCED_POWER_MANAGEMENT = 0x80000007, ///< Advanced Power Management (with + ///< Invariant TSC in register `edx`) + ADDRESS_SIZES = + 0x80000008, ///< Linear/Physical Address size (in register `eax`) +}; + +/*! \brief Get CPU identification and feature information + * + * \param eax Requested feature + * \return Register values filled by instruction `cpuid` for the requested + * feature + * + * \see [ISDMv2, Chapter 3. CPUID - CPU + * Identification](intel_manual_vol2.pdf#page=292) + */ +inline Reg get(Function eax) { + Reg r; + asm volatile("cpuid \n\t" + : "=a"(r.eax), "=b"(r.ebx), "=c"(r.ecx), "=d"(r.edx) + : "0"(eax)); + return r; +} + +enum FeatureECX { + FEATURE_SSE3 = 1 << 0, ///< Prescott New Instructions-SSE3 (PNI) + FEATURE_PCLMUL = 1 << 1, ///< Carry-less Multiplication + FEATURE_DTES64 = 1 << 2, ///< 64-bit debug store (edx bit 21) + FEATURE_MONITOR = 1 << 3, ///< MONITOR and MWAIT instructions (SSE3) + FEATURE_DS_CPL = 1 << 4, ///< CPL qualified debug store + FEATURE_VMX = 1 << 5, ///< Virtual Machine eXtensions + FEATURE_SMX = 1 << 6, ///< Safer Mode Extensions (LaGrande) + FEATURE_EST = 1 << 7, ///< Enhanced SpeedStep + FEATURE_TM2 = 1 << 8, ///< Thermal Monitor 2 + FEATURE_SSSE3 = 1 << 9, ///< Supplemental SSE3 instructions + FEATURE_CID = 1 << 10, ///< L1 Context ID + FEATURE_SDBG = 1 << 11, ///< Silicon Debug interface + FEATURE_FMA = 1 << 12, ///< Fused multiply-add (FMA3) + FEATURE_CX16 = 1 << 13, ///< CMPXCHG16B instruction + FEATURE_ETPRD = 1 << 14, ///< Can disable sending task priority messages + FEATURE_PDCM = 1 << 15, ///< Perfmon & debug capability + FEATURE_PCIDE = 1 << 17, ///< Process context identifiers (CR4 bit 17) + FEATURE_DCA = 1 << 18, ///< Direct cache access for DMA writes + FEATURE_SSE4_1 = 1 << 19, ///< SSE4.1 instructions + FEATURE_SSE4_2 = 1 << 20, ///< SSE4.2 instructions + FEATURE_X2APIC = 1 << 21, ///< x2APIC + FEATURE_MOVBE = 1 << 22, ///< MOVBE instruction (big-endian) + FEATURE_POPCNT = 1 << 23, ///< POPCNT instruction + FEATURE_TSC_DEADLINE = + 1 + << 24, ///< APIC implements one-shot operation using a TSC deadline value + FEATURE_AES = 1 << 25, ///< AES instruction set + FEATURE_XSAVE = 1 << 26, ///< XSAVE, XRESTOR, XSETBV, XGETBV + FEATURE_OSXSAVE = 1 << 27, ///< XSAVE enabled by OS + FEATURE_AVX = 1 << 28, ///< Advanced Vector Extensions + FEATURE_F16C = 1 << 29, ///< F16C (half-precision) FP feature + FEATURE_RDRND = + 1 << 30, ///< RDRAND (on-chip random number generator) feature + FEATURE_HYPERVISOR = + 1 << 31 ///< Hypervisor present (always zero on physical CPUs) +}; + +enum FeatureEDX { + FEATURE_FPU = 1 << 0, ///< Onboard x87 FPU + FEATURE_VME = + 1 << 1, ///< Virtual 8086 mode extensions (such as VIF, VIP, PIV) + FEATURE_DE = 1 << 2, ///< Debugging extensions (CR4 bit 3) + FEATURE_PSE = 1 << 3, ///< Page Size Extension + FEATURE_TSC = 1 << 4, ///< Time Stamp Counter + FEATURE_MSR = 1 << 5, ///< Model-specific registers + FEATURE_PAE = 1 << 6, ///< Physical Address Extension + FEATURE_MCE = 1 << 7, ///< Machine Check Exception + FEATURE_CX8 = 1 << 8, ///< CMPXCHG8 (compare-and-swap) instruction + FEATURE_APIC = + 1 << 9, ///< Onboard Advanced Programmable Interrupt Controller + FEATURE_SEP = 1 << 11, ///< SYSENTER and SYSEXIT instructions + FEATURE_MTRR = 1 << 12, ///< Memory Type Range Registers + FEATURE_PGE = 1 << 13, ///< Page Global Enable bit in CR4 + FEATURE_MCA = 1 << 14, ///< Machine check architecture + FEATURE_CMOV = 1 << 15, ///< Conditional move and FCMOV instructions + FEATURE_PAT = 1 << 16, ///< Page Attribute Table + FEATURE_PSE36 = 1 << 17, ///< 36-bit page size extension + FEATURE_PSN = 1 << 18, ///< Processor Serial Number + FEATURE_CLF = 1 << 19, ///< CLFLUSH instruction (SSE2) + FEATURE_DTES = 1 << 21, ///< Debug store: save trace of executed jumps + FEATURE_ACPI = 1 << 22, ///< Onboard thermal control MSRs for ACPI + FEATURE_MMX = 1 << 23, ///< MMX instructions + FEATURE_FXSR = 1 << 24, ///< FXSAVE, FXRESTOR instructions, CR4 bit 9 + FEATURE_SSE = 1 << 25, ///< SSE instructions (a.k.a. Katmai New Instructions) + FEATURE_SSE2 = 1 << 26, ///< SSE2 instructions + FEATURE_SS = 1 << 27, ///< CPU cache implements self-snoop + FEATURE_HTT = 1 << 28, ///< Hyper-threading + FEATURE_TM1 = 1 << 29, ///< Thermal monitor automatically limits temperature + FEATURE_IA64 = 1 << 30, ///< IA64 processor emulating x86 + FEATURE_PBE = 1 << 31 ///< Pending Break Enable (PBE# pin) wakeup capability +}; + +enum ExtendedFeatureEDX { + EXTENDED_FEATURE_FPU = 1 << 0, ///< Onboard x87 FPU + EXTENDED_FEATURE_VME = + 1 << 1, ///< Virtual 8086 mode extensions (such as VIF, VIP, PIV) + EXTENDED_FEATURE_DE = 1 << 2, ///< Debugging extensions (CR4 bit 3) + EXTENDED_FEATURE_PSE = 1 << 3, ///< Page Size Extension + EXTENDED_FEATURE_TSC = 1 << 4, ///< Time Stamp Counter + EXTENDED_FEATURE_MSR = 1 << 5, ///< Model-specific registers + EXTENDED_FEATURE_PAE = 1 << 6, ///< Physical Address Extension + EXTENDED_FEATURE_MCE = 1 << 7, ///< Machine Check Exception + EXTENDED_FEATURE_CX8 = 1 << 8, ///< CMPXCHG8 (compare-and-swap) instruction + EXTENDED_FEATURE_APIC = + 1 << 9, ///< Onboard Advanced Programmable Interrupt Controller + EXTENDED_FEATURE_SYSCALL = 1 << 11, ///< SYSCALL and SYSRET instructions + EXTENDED_FEATURE_MTRR = 1 << 12, ///< Memory Type Range Registers + EXTENDED_FEATURE_PGE = 1 << 13, ///< Page Global Enable bit in CR4 + EXTENDED_FEATURE_MCA = 1 << 14, ///< Machine check architecture + EXTENDED_FEATURE_CMOV = 1 << 15, ///< Conditional move and FCMOV instructions + EXTENDED_FEATURE_PAT = 1 << 16, ///< Page Attribute Table + EXTENDED_FEATURE_PSE36 = 1 << 17, ///< 36-bit page size extension + EXTENDED_FEATURE_MP = 1 << 19, ///< Multiprocessor Capable + EXTENDED_FEATURE_NX = 1 << 20, ///< Non-executable bit + EXTENDED_FEATURE_MMXEXT = 1 << 22, ///< extended MMX instructions + EXTENDED_FEATURE_MMX = 1 << 23, ///< MMX instructions + EXTENDED_FEATURE_FXSR = 1 + << 24, ///< FXSAVE, FXRESTOR instructions, CR4 bit 9 + EXTENDED_FEATURE_FXSR_OPT = 1 << 25, ///< FXSAVE, FXRESTOR optimizations + EXTENDED_FEATURE_PDPE1GB = 1 << 26, ///< Gibibyte Pages + EXTENDED_FEATURE_RDTSCP = 1 << 27, ///< CPU cache implements self-snoop + EXTENDED_FEATURE_LM = 1 << 29, ///< Long Mode (x64) + EXTENDED_FEATURE_3DNOWEXT = 1 << 30, ///< Extended 3DNow! instructions + EXTENDED_FEATURE_3DNOW = 1 << 31 ///< 3DNow! instructions +}; + +/*! \brief Check if feature is provided by this system + * + * \param feature Feature to test + * \return `true` if available, `false` otherwise + */ +inline bool has(enum FeatureECX feature) { + return (get(FEATURE_BITS).ecx & feature) != 0; +} + +/*! \brief Check if feature is provided by this system + * + * \param feature Feature to test + * \return `true` if available, `false` otherwise + */ +inline bool has(enum FeatureEDX feature) { + return (get(FEATURE_BITS).edx & feature) != 0; +} + +/*! \brief Check if feature is provided by this system + * + * \param feature Extended feature to test + * \return `true` if available, `false` if either feature or extended features + * are unavailable + */ +inline bool has(enum ExtendedFeatureEDX feature) { + return (get(EXTENDED_FEATURE_BITS).edx & feature) != 0; +} +} // namespace CPUID diff --git a/arch/gdt.cc b/arch/gdt.cc new file mode 100644 index 0000000..8f8d0f9 --- /dev/null +++ b/arch/gdt.cc @@ -0,0 +1,36 @@ +#include "gdt.h" + +#include "core.h" + +namespace GDT { + +// The static 32-bit Global Descriptor Table (GDT) +alignas(16) constinit SegmentDescriptor protected_mode[] = { + // Null descriptor + {}, + + // Global code segment von 0-4GB + SegmentDescriptor::Segment(0, UINT32_MAX, true, 0, SIZE_32BIT), + + // Global data segment von 0-4GB + SegmentDescriptor::Segment(0, UINT32_MAX, false, 0, SIZE_32BIT), +}; +extern "C" constexpr Pointer gdt_protected_mode_pointer(protected_mode); + +// The static 64-bit Global Descriptor Table (GDT) +// \see [ISDMv3 3.2.4 Segmentation in IA-32e +// Mode](intel_manual_vol3.pdf#page=91) +alignas(16) constinit SegmentDescriptor long_mode[] = { + // Null descriptor + SegmentDescriptor::Null(), + + // Global code segment + SegmentDescriptor::Segment64(true, 0), + + // Global data segment + SegmentDescriptor::Segment64(false, 0), + +}; +extern "C" constexpr Pointer gdt_long_mode_pointer(long_mode); + +} // namespace GDT diff --git a/arch/gdt.h b/arch/gdt.h new file mode 100644 index 0000000..347532b --- /dev/null +++ b/arch/gdt.h @@ -0,0 +1,199 @@ +/*! \file + * \brief The \ref GDT "Global Descriptor Table (GDT)". + */ + +#pragma once +#include "../types.h" + +/*! \brief Abstracts the GDT that, primarily, contains descriptors to memory + * segments. + * \ingroup memory + * + * The GDT is a table that primarily contains segment descriptors. Segment + * descriptors has a size of 8 Bytes and contains the size, position, access + * rights, and purpose of such a segment. Unlike the LDT, the GDT is shared + * between all processes and may contain TSS and LDT descriptors. For the + * kernel, the first entry is required to be a null descriptor and the code and + * data segments. To support user-mode processes, additional TSS, code, and data + * segments for ring 3 must be added. + * + * The base address and size of the GDT are written to the GDTR register during + * boot (via. `lgdt`). + * + * \see [ISDMv3, 2.4.1; Global Descriptor Table Register + * (GDTR)](intel_manual_vol3.pdf#page=72) + * \see [ISDMv3, 3.5.1; Segment + * Descriptor Tables](intel_manual_vol3.pdf#page=99) + */ +namespace GDT { + +enum Segments { + SEGMENT_NULL = 0, + SEGMENT_KERNEL_CODE, + SEGMENT_KERNEL_DATA, +}; + +/*! \brief Unit of the segment limit + */ +enum Granularity { + GRANULARITY_BYTES = 0, ///< Segment limit in Bytes + GRANULARITY_4KBLOCK = 1 ///< Segment limit in blocks of 4 Kilobytes +}; + +/*! \brief Descriptor type */ +enum DescriptorType { + DESCRIPTOR_SYSTEM = 0, ///< entry is a system segment + DESCRIPTOR_CODEDATA = 1, ///< entry is a code/data segment +}; + +/*! \brief Address width + */ +enum Size { + SIZE_16BIT = 0, ///< 16-bit (D/B = 0, L = 0) + SIZE_32BIT = 2, ///< 32-bit (D/B = 1, L = 0) + SIZE_64BIT = 1, ///< 64-bit (D/B = 0, L = 1) +}; + +/*! \brief Type flags for used descriptor types + */ +enum TypeFlags { + TYPE_DATA_RW = 0b0010ull, ///< Data rw, not expanding down + TYPE_CODE_RX = 0b1010ull, ///< Code rx, non-conforming +}; + +/*! \brief Describes the structure of segment descriptors + * + * A data structure that contains size, position, access rights, and purpose of + * any segment. Segment descriptors are used in both the GDT, as well as in + * LDTs. + * + * \see [ISDMv3, 3.4.5; Segment Descriptors](intel_manual_vol3.pdf#page=95) + * \see [AAPMv2, 4.7 Legacy Segment Descriptors](amd64_manual_vol2.pdf#page=132) + * \see [AAPMv2, 4.8 Long-Mode Segment + * Descriptors](amd64_manual_vol2.pdf#page=140) + */ +union SegmentDescriptor { + // Universally valid values (shared across all segment types) + struct { + uint64_t limit_low : 16; ///< Least-significant bits of segment size + ///< (influenced by granularity!) + uint64_t base_low : 24; ///< Least-significant bits of base address + uint64_t + type : 4; ///< Meaning of those 4 bits depends on descriptor_type below + DescriptorType descriptor_type : 1; ///< Descriptor type (influences the + ///< meaning of the 3 bits above) + uint64_t privilege_level : 2; ///< Ring for this segment + bool present : 1; ///< Entry is valid iff set to `true` + uint64_t limit_high : 4; ///< Most-significant bits of segment size + bool available : 1; ///< Bit which can be used for other purposes (in + ///< software) + uint64_t custom : 2; ///< Meaning of those 2 bits relate to descriptor_type + ///< and type + Granularity + granularity : 1; ///< Unit used as granularity for the segment limit + uint64_t base_high : 8; ///< most-significant bits of base address + } __attribute__((packed)); + + uint64_t value; ///!< Merged value + + /*! \brief Explicitly constructs a null descriptor. + */ + consteval static SegmentDescriptor Null() { + return SegmentDescriptor{ + .value = 0, + }; + } + + /*! \brief Constructs a code/data segment descriptor. + * \param base Base Address of segment + * \param limit Size of segment + * \param code Code or data segment + * \param ring Privilege level + * \param size Address width + */ + consteval static SegmentDescriptor Segment(uintptr_t base, uint32_t limit, + bool code, uint64_t ring, + Size size) { + return SegmentDescriptor{ + .limit_low = limit >> (limit > 0xFFFFF ? 12 : 0) & 0xFFFF, + .base_low = base & 0xFFFFFF, + .type = code ? TYPE_CODE_RX : TYPE_DATA_RW, + .descriptor_type = DESCRIPTOR_CODEDATA, + .privilege_level = ring, + .present = true, + .limit_high = (limit > 0xFFFFF ? (limit >> 28) : (limit >> 16)) & 0xF, + .available = false, + .custom = size, + .granularity = + limit > 0xFFFFF ? GRANULARITY_4KBLOCK : GRANULARITY_BYTES, + .base_high = (base >> 24) & 0xFF, + }; + } + + /*! \brief Constructs a 64bit code/data segment descriptor. + * \param code Code or data segment + * \param ring Privilege level + */ + consteval static SegmentDescriptor Segment64(bool code, int ring) { + return SegmentDescriptor::Segment(0, 0, code, ring, SIZE_64BIT); + } + +} __attribute__((packed)); + +static_assert(sizeof(SegmentDescriptor) == 8, + "GDT::SegmentDescriptor has wrong size"); + +/*! \brief Structure that describes a GDT Pointer (aka GDT Descriptor) + * + * It contains both the length (in bytes) of the GDT (minus 1 byte) and the + * pointer to the GDT. The pointer to the GDT can be loaded using the + * instruction `lgdt`. + * + * \note As Intel uses little endian for representing multi-byte values, the + * GDT::Pointer structure can be used for 16, 32, and 64 bit descriptor tables: + * \verbatim + * | 16 bit | 16 bit | 16 bit | 16 bit | 16 bit | + * +--------+---------------------------------------+ + * Pointer | limit | base (up to 64 bit) | + * +--------+---------+---------+---------+---------+ + * | used for 16 bit | ignored... | + * | used for 32 bit | ignored... | + * | used for 64 bit | + * \endverbatim + * + * \see [ISDMv3, Figure 2-6; Memory Management + * Registers](intel_manual_vol3.pdf#page=72) + */ +struct Pointer { + uint16_t limit; //!< GDT size in bytes (minus 1 byte) + void* base; //!< GDT base address + + /*! \brief Constructor (automatic length) + * \param desc Array of GDT segment descriptors -- must be defined in the same + * module! + */ + template + explicit constexpr Pointer(const T (&desc)[LEN]) + : limit(LEN * sizeof(T) - 1), base(const_cast(desc)) {} + + /*! \brief Constructor + * \param desc Address of the GDT segment descriptors + * \param len Number of entries + */ + consteval Pointer(void* desc, size_t len) + : limit(len * sizeof(SegmentDescriptor) - 1), base(desc) {} + + /*! \brief Set an address + * \note On change, `lgdt` must be executed again + * \param desc Address of the GDT segment descriptors + * \param len Number of entries + */ + constexpr void set(void* desc, size_t len) { + limit = len * sizeof(SegmentDescriptor) - 1; + base = desc; + } +} __attribute__((packed)); + +static_assert(sizeof(Pointer) == 10, "GDT::Pointer has wrong size"); + +} // namespace GDT diff --git a/arch/idt.cc b/arch/idt.cc new file mode 100644 index 0000000..d610859 --- /dev/null +++ b/arch/idt.cc @@ -0,0 +1,35 @@ +#include "idt.h" + +#include "core_interrupt.h" + +namespace IDT { + +// Interrupt Descriptor Table, 8 Byte aligned +constinit struct InterruptDescriptor idt[256] = {}; + +// Struct used for loading (the address of) the Interrupt Descriptor Table into +// the IDT-Register +struct Register { + uint16_t limit; // Address of the last valid byte (relative to base) + struct InterruptDescriptor* base; + explicit Register(uint8_t max = 255) { + limit = (max + static_cast(1)) * sizeof(InterruptDescriptor) - 1; + base = idt; + } +} __attribute__((packed)); + +static_assert(sizeof(InterruptDescriptor) == 16, + "IDT::InterruptDescriptor has wrong size"); +static_assert(sizeof(Register) == 10, "IDT::Register has wrong size"); +static_assert(alignof(decltype(idt)) % 8 == 0, "IDT must be 8 byte aligned!"); + +void load() { + // Create structure required for writing to idtr and load via lidt + Register idtr(Core::Interrupt::VECTORS - 1); + asm volatile("lidt %0\n\t" ::"m"(idtr)); +} + +void set(Core::Interrupt::Vector vector, InterruptDescriptor descriptor) { + idt[(uint8_t)vector] = descriptor; +} +} // namespace IDT diff --git a/arch/idt.h b/arch/idt.h new file mode 100644 index 0000000..ac97355 --- /dev/null +++ b/arch/idt.h @@ -0,0 +1,210 @@ +/*! \file + * \brief \ref IDT "Interrupt Descriptor Table (IDT)" containing the entry + * points for interrupt handling. + */ + +#pragma once +#include "../types.h" +#include "core_interrupt.h" + +/*! \brief "Interrupt Descriptor Table (IDT) + * \ingroup interrupts + * + * \see [ISDMv3 6.14 Exception and Interrupt Handling in 64-bit + * Mode](intel_manual_vol3.pdf#page=200) + */ + +/*! \brief Preserved interrupt context + * + * After an interrupt was triggered, the core first saves the basic context + * (current code- & stack segment, instruction & stack pointer and the status + * flags register) and looks up the handling function for the vector using the + * \ref IDT. No other registers are saved or restored automatically. It is the + * handlers (our) job to save and restore all modified registers. However, most + * handlers in StuBS are implemented directly in C++ utilizing the `interrupt` + * attribute: The compiler treats all modified registers as callee-saved, which + * saves us a lot of work, but prevents us from knowing the exact contents of + * each regiser (we don't know if/when the compiler modified it). `interrupt` + * functions receive up to two parameters: A pointer to this /ref + * InterruptContext and, depending on the interrupt, an error code, which is + * also pushed onto the stack by the CPU. Contrary to "normal" functions, the + * compiler will return using the `iret` instruction. + */ +struct InterruptContext { + // Context saved by CPU + // uintptr_t error_code; ///< Error Code + uintptr_t ip; ///< Instruction Pointer (at interrupt) + uintptr_t cs : 16; ///< Code segment (in case of a ring switch it is the + ///< segment of the user mode) + uintptr_t : 0; ///< Alignment (due to 16 bit code segment) + uintptr_t flags; ///< Status flags register + uintptr_t sp; ///< Stack pointer (at interrupt) + uintptr_t ss : 16; ///< Stack segment (in case of a ring switch it is the + ///< segment of the user mode) + uintptr_t : 0; ///< Alignment (due to 16 bit stack segment) +} __attribute__((packed)); +static_assert(sizeof(InterruptContext) == 5 * 8, + "InterruptContext has wrong size"); + +namespace IDT { +/*! \brief Gate types + * + * \see [ISDMv3 3.5 System Descriptor Types](intel_manual_vol3.pdf#page=99) + */ +enum Gate { + GATE_INT = 0x6, ///< Interrupt Gate (CPU disables interrupts unpon entry) + GATE_TRAP = 0x7, ///< Trap Gate (interrupts remain enabled unpon entry) +}; + +/*! \brief Segment type + * + * \see [ISDMv3 3.5 System Descriptor Types](intel_manual_vol3.pdf#page=99) + */ +enum GateSize { + GATE_SIZE_16 = 0, ///< 16 bit + GATE_SIZE_32 = 1, ///< 32 bit + GATE_SIZE_64 = 1, ///< 64 bit +}; + +/*! \brief Descriptor Privilege Level + */ +enum DPL { + DPL_KERNEL = 0, ///< Ring 0 / Kernel mode + /* DPLs 1 and 2 are unused */ + DPL_USER = 3, ///< Ring 3 / User mode +}; + +/*! \brief Interrupt handler that returns after execution (trap/fault). + */ +using ReturningHandler = void (*)(InterruptContext*); + +/*! \brief Interrupt handler that returns after execution (trap/fault) and + * receives an error code. + */ +using ReturningHandlerWithError = void (*)(InterruptContext*, uint64_t); + +/*! \brief Interrupt handler that does **not** return after execution (abort). + */ +using DivergingHandler = void (*)(InterruptContext*); + +/*! \brief Interrupt handler that does **not** return after execution (abort) + * and receives an error code. + */ +using DivergingHandlerWithError = void (*)(InterruptContext*, uint64_t); + +/*! \brief Interrupt Descriptor stored in the Interrupt-Descriptor Table (IDT) + */ +struct alignas(8) InterruptDescriptor { + uint16_t address_low; ///< lower interrupt function offset + uint16_t selector; ///< code segment selector in GDT or LDT + union { + struct { + uint8_t ist : 3; ///< Interrupt Stack Index + uint8_t : 5; ///< unused, has to be 0 + Gate type : 3; ///< gate type + GateSize size : 1; ///< gate size + uint8_t : 1; ///< unused, has to be 0 + DPL dpl : 2; ///< descriptor privilege level + bool present : 1; ///< present: 1 for interrupts + } __attribute__((packed)); + uint16_t flags; + }; + uint64_t address_high : 48; ///< higher interrupt function offset + uint64_t : 0; ///< fill until aligned with 64 bit + + /*! \brief Create a non-present interrupt descriptor. + */ + InterruptDescriptor() = default; + + /*! \brief Create an interrupt descriptor. + * + * + * \param handler Entry point for interrupt handling + * \param ist Stack index from the \ref TaskStateSegment for the interrupt + * handler. Set to 0 to use current stack. + * \param dpl Permissions required for enter this interrupt handler + * (kernel- or user space) + */ + InterruptDescriptor(uintptr_t handler, uint8_t ist, DPL dpl) + : address_low(handler & 0xffff), + selector(8), // XXX: This should come from `Segments` + ist(ist), + type(GATE_INT), + size(GATE_SIZE_64), + dpl(dpl), + present(true), + address_high((handler >> 16) & 0xffffffffffff) {} + + /*! \brief Create an interrupt descriptor for a handler that does return + * (trap/fault). + * + * \param handler Entry point for interrupt handling + * \param ist Stack index from the \ref TaskStateSegment for the interrupt + * handler. Set to 0 to use current stack. + * \param dpl Permissions required for enter this interrupt handler + * (kernel- or user space) + */ + static InterruptDescriptor Returning(ReturningHandler handler, + uint8_t ist = 0, DPL dpl = DPL_KERNEL) { + return {reinterpret_cast(handler), ist, dpl}; + } + + /*! \brief Create an interrupt descriptor for a handler that does return + * (traps/fault) and receives an error code. + * + * + * \param handler Entry point for interrupt handling + * \param ist Stack index from the \ref TaskStateSegment for the interrupt + * handler. Set to 0 to use current stack. + * \param dpl Permissions required for enter this interrupt handler + * (kernel- or user space) + */ + static InterruptDescriptor ReturningWithError( + ReturningHandlerWithError handler, uint8_t ist = 0, + DPL dpl = DPL_KERNEL) { + return {reinterpret_cast(handler), ist, dpl}; + } + + /*! \brief Create an interrupt descriptor for a handler that does **not** + * return (abort). + * + * + * \param handler Entry point for interrupt handling + * \param ist Stack index from the \ref TaskStateSegment for the interrupt + * handler. Set to 0 to use current stack. + * \param dpl Permissions required for enter this interrupt handler + * (kernel- or user space) + */ + static InterruptDescriptor Diverging(DivergingHandler handler, + uint8_t ist = 0, DPL dpl = DPL_KERNEL) { + return {reinterpret_cast(handler), ist, dpl}; + } + + /*! \brief Create an interrupt descriptor for a handler that does **not** + * return (abort) and receives an error code. + * + * + * \param handler Entry point for interrupt handling + * \param ist Stack index from the \ref TaskStateSegment for the interrupt + * handler. Set to 0 to use current stack. + * \param dpl Permissions required for enter this interrupt handler + * (kernel- or user space) + */ + static InterruptDescriptor DivergingWithError( + DivergingHandlerWithError handler, uint8_t ist = 0, + DPL dpl = DPL_KERNEL) { + return {reinterpret_cast(handler), ist, dpl}; + } + +} __attribute__((packed)); +static_assert(sizeof(InterruptDescriptor) == 16, + "IDT::InterruptDescriptor has wrong size"); + +/*! \brief Load the IDT's address and size into the IDT-Register via `idtr`. + */ +void load(); + +/*! \brief Set the idt entry for the given interrupt vector. + */ +void set(Core::Interrupt::Vector vector, InterruptDescriptor descriptor); +} // namespace IDT diff --git a/arch/ioapic.cc b/arch/ioapic.cc new file mode 100644 index 0000000..28606bb --- /dev/null +++ b/arch/ioapic.cc @@ -0,0 +1,44 @@ +#include "ioapic.h" + +namespace IOAPIC { +/*! \brief IOAPIC registers memory mapped into the CPU's address space. + * + * Access to the actual IOAPIC registers can be obtained by performing the + * following steps: + * 1. Write the number of the IOAPIC register to the address stored in + * `IOREGSEL_REG` + * 2. Read the value from / write the value to the address referred to by + * `IOWIN_REG`. + * + * \see [IO-APIC manual](intel_ioapic.pdf#page=8) + */ +volatile Index *IOREGSEL_REG = reinterpret_cast(0xfec00000); +/// \copydoc IOREGSEL_REG +volatile Register *IOWIN_REG = + reinterpret_cast(0xfec00010); + +// IOAPIC manual, p. 8 +const Index IOAPICID_IDX = 0x00; +const Index IOREDTBL_IDX = 0x10; + +const uint8_t slot_max = 24; + +void init() {} + +void config(uint8_t slot, Core::Interrupt::Vector vector, + TriggerMode trigger_mode, Polarity polarity) { + (void)slot; + (void)vector; + (void)trigger_mode; + (void)polarity; +} + +void allow(uint8_t slot) { (void)slot; } + +void forbid(uint8_t slot) { (void)slot; } + +bool status(uint8_t slot) { + (void)slot; + return false; +} +} // namespace IOAPIC diff --git a/arch/ioapic.h b/arch/ioapic.h new file mode 100644 index 0000000..2f3480f --- /dev/null +++ b/arch/ioapic.h @@ -0,0 +1,82 @@ +/*! \file + * \brief \ref IOAPIC abstracts the access to the I/O \ref APIC + */ + +#pragma once +#include "../types.h" +#include "core_interrupt.h" +#include "ioapic_registers.h" + +/*! \brief Abstraction of the I/O APIC that is used for management of external + * interrupts. + * \ingroup interrupts + * + * The I/O APIC's Core component is the IO-redirection table. This table is + * used to configure a flexible mapping between the interrupt number and the + * external interruption. Entries within this table have a width of 64 bit. For + * convenience, the union \ref IOAPIC::RedirectionTableEntry should be used for + * modifying these tables (see file `ioapic_registers.h` for details). + */ + +namespace IOAPIC { +/*! \brief Initializes the I/O APIC. + * + * This function will initialize the I/O APIC by initializing the + * IO-redirection table with sane default values. The default interrupt-vector + * number is chosen such that, in case the interrupt is issued, the panic + * handler is executed. In the beginning, all external interrupts are disabled + * within the I/O APIC. Apart from the redirection table, the `APICID` (read + * from the system description tables during boot) needs to be written to the + * `IOAPICID` register (see \ref APIC::getIOAPICID() ) + * + * \todo(12) Implement Function + */ +void init(); + +/*! \brief Creates a mapping between an interrupt vector and an external + interrupt. + * + * \param slot Number of the slot (i.e., the external interrupt) to + configure. + * \param vector Number of the interrupt vector that will be issued for + the external interrupt. + * \param trigger_mode Edge or level triggered interrupt signaling + (level-triggered interrupts required for the optional serial interface) + * \param polarity Polarity of the interrupt signaling (active high or + active low) + * + * \todo(12) Implement Function + */ +void config(uint8_t slot, Core::Interrupt::Vector vector, + TriggerMode trigger_mode = TriggerMode::EDGE, + Polarity polarity = Polarity::HIGH); + +/*! \brief Enables the redirection of particular external interrupts to the + * CPU(s). + * + * To fully enable interrupt handling, the interrupts must be enabled for every + * CPU (e.g., by calling + * \ref Core::Interrupt::enable() in main). + * \todo(12) Do that somewhere appropriate. + * + * \param slot Number of the external interrupt that should be enabled. + * + * \todo(12) Implement Function + */ +void allow(uint8_t slot); + +/*! \brief Selectively masks external interrupts by slot number. + * \param slot Slot number of the interrupt to be disabled. + * + * \todo(12) Implement Function + */ +void forbid(uint8_t slot); + +/*! \brief Check whether an external interrupt source is masked. + * \param slot Slot number of the interrupt to be checked. + * \return Returns `true` iff the interrupt is unmasked, `false` otherwise + * + * \todo(12) Implement Function + */ +bool status(uint8_t slot); +} // namespace IOAPIC diff --git a/arch/ioapic_registers.h b/arch/ioapic_registers.h new file mode 100644 index 0000000..7e7937d --- /dev/null +++ b/arch/ioapic_registers.h @@ -0,0 +1,227 @@ +/*! \file + * \brief Helper structures for interacting with the \ref IOAPIC "I/O APIC". + */ + +#pragma once +#include "../types.h" + +namespace IOAPIC { +typedef uint32_t Index; +typedef uint32_t Register; + +extern volatile Index *IOREGSEL_REG; +extern volatile Register *IOWIN_REG; + +/*! \brief I/O APIC Identification + * + * The IOAPICID register is register number 0x0. The I/O APIC's ID will be read + * from the system configuration tables (provided by the BIOS) during boot. The + * number can be queried by calling \ref APIC::getIOAPICID(). During + * initialization, this number must be written to the IOAPICID register. + * + * \see [IO-APIC manual](intel_ioapic.pdf#page=9), page 9 + */ +union Identification { + struct { + uint32_t : 24, ///< Reserved, do not modify + id : 4, ///< I/O APIC Identification + : 4; ///< Reserved, do not modify + }; + Register value; + explicit Identification(Register value) : value(value) {} +} __attribute__((packed)); +static_assert(sizeof(Identification) == 4, + "IOAPIC Identification has wrong size"); + +/*! \brief Delivery mode specifies the type of interrupt sent to the CPU. */ +enum DeliveryMode { + FIXED = 0, ///< "ordinary" interrupt; send to ALL cores listed in the + ///< destination bit mask + LOWEST_PRIORITY = 1, ///< "ordinary" interrupt; send to the lowest priority + ///< core from destination mask + SMI = 2, ///< System Management Interrupt; vector number required to be 0 + // Reserved + NMI = 4, ///< Non-Maskable Interrupt, vector number ignored, only edge + ///< triggered + INIT = 5, ///< Initialization interrupt (always treated as edge triggered) + // Reserved + EXTERN_INT = 7 ///< external interrupt (only edge triggered) +}; + +/*! \brief Way of interpreting the value written to the destination field. */ +enum DestinationMode { + PHYSICAL = 0, ///< Destination contains the physical destination APIC ID + LOGICAL = 1 ///< Destination contains a mask of logical APIC IDs +}; + +/*! \brief Interrupt polarity for the redirection-table entry */ +enum Polarity { + HIGH = 0, ///< active high + LOW = 1 ///< active low +}; + +/*! \brief Trigger mode */ +enum TriggerMode { + EDGE = 0, ///< edge triggered + LEVEL = 1 ///< level triggered +}; + +/*! \brief Interrupt state */ +enum DeliveryStatus { + IDLE = 0, ///< No activity for this interrupt + SEND_PENDING = + 1 ///< Interrupt will be sent as soon as the bus / LAPIC is ready +}; + +/*! \brief Interrupt masking */ +enum InterruptMask { + UNMASKED = 0, ///< Redirection-table entry is active (non-masked) + MASKED = 1 ///< Redirection-table entry is inactive (masked) +}; + +/*! \brief Entry in the redirection table. + * + * The redirection table begins with I/O APIC register `0x10` and ends at + * `0x3f`. + * + * Each entry has a size of 64 bit, equaling two I/O APIC registers. + * For instance, entry 0 is stored in registers `0x10` and `0x11`, in which the + * low-order 32 bit (equals \ref value_low) and high-order 32 bit (equals \ref + * value_high) need to be stored. + * + * The union defined below provides an overlay allowing convenient modification + * of individual bits, while the 32-bit values \ref value_low and \ref + * value_high can be used for writing to the I/O APIC registers. + * + * \note [Type punning](https://en.wikipedia.org/wiki/Type_punning#Use_of_union) + * is indeed undefined behavior in C++. However, *gcc* explicitly allows + * this construct as a [language extension](https://gcc.gnu.org/bugs/#nonbugs). + * Some compilers ([other than + * gcc](https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#Type%2Dpunning) + * might allow this feature only by disabling strict aliasing + * (`-fno-strict-aliasing`). In \StuBS we use this feature extensively due to + * the improved code readability. + * + * \see [IO-APIC manual](intel_ioapic.pdf#page=11), page 11-13 + */ +union RedirectionTableEntry { + // @cond ANONYMOUS_STRUCT + struct { + // @endcond + + /*! \brief Interrupt vector in the \ref IDT "Interrupt Descriptor Table + * (IDT)" will be activated when the corresponding external interrupt + * triggers. + */ + uint64_t vector : 8; + + /*! \brief The delivery mode denotes the way the interrupts will be + * delivered to the local CPU cores, respectively to their local APICs. + * + * For StuBS, we use \ref LOWEST_PRIORITY, as all CPU cores have the same + * priority and we want to distribute interrupts evenly among them. + * It, however, is not guaranteed that this method of load balancing will + * work on every system. + */ + DeliveryMode delivery_mode : 3; + + /*! \brief The destination mode defines how the value stored in \ref + * destination will be interpreted. + * + * For StuBS, we use \ref LOGICAL + */ + DestinationMode destination_mode : 1; + + /*! \brief Delivery status holds the current status of interrupt delivery. + * + * \note This field is read only; write accesses to this field will be + * ignored. + */ + DeliveryStatus delivery_status : 1; + + /*! \brief The polarity denotes when an interrupt should be issued. + * + * For StuBS, we usually use \ref HIGH (i.e., when the interrupt line is, + * logically, `1`). + */ + Polarity polarity : 1; + + /*! \brief The remote IRR bit indicates whether the local APIC(s) accept the + * level interrupt. + * + * Once the LAPIC sends an \ref LAPIC::endOfInterrupt "End Of Interrupt + * (EOI)", this bit is reset to `0`. + * + * \note This field is read only and is only meaningful for level-triggered + * interrupts. + */ + uint64_t remote_irr : 1; + + /*! \brief The trigger mode states whether the interrupt signaling is level + * or edge triggered. + * + * StuBS uses \ref EDGE for the Timer, the Keybaord and (optional) serial + * interface need \ref LEVEL + */ + TriggerMode trigger_mode : 1; + + /*! \brief Mask or unmask interrupts for a particular, external source. + * + * The interrupt mask denotes whether interrupts should be + * accepted/unmasked (value \ref UNMASKED) or ignored/masked (value \ref + * MASKED). + */ + InterruptMask interrupt_mask : 1; + + /*! \brief Reserved, do not modify. */ + uint64_t : 39; + + /*! \brief Interrupt destination. + * + * The meaning of destination depends on the destination mode: + * For the logical destination mode, destination holds a bit mask made up + * of the cores that are candidates for receiving the interrupt. In the + * single-core case, this value is `1`, in the multi-core case, the `n` + * low-order bits needs to be set (with `n` being the number of CPU cores, + * see \ref Core::count() ). Setting the `n` low-order bits marks all + * available cores as candidates for receiving interrupts and thereby + * balancing the number of interrupts between the cores. + * + * \note This form of load balancing depends on the hardware's behavior and + * may not work on all systems in the same fashion. Most notably, in QEMU + * all interrupts are sent to the BSP (core 0). + */ + uint64_t destination : 8; + + // @cond ANONYMOUS_STRUCT + } __attribute__((packed)); + // @endcond + + // @cond ANONYMOUS_STRUCT + struct { + // @endcond + + Register value_low; ///< Low-order 32 bits (for the register with the + ///< smaller index) + Register value_high; ///< High-order 32 bits (for the register with the + ///< higher index) + // @cond ANONYMOUS_STRUCT + } __attribute__((packed)); + // @endcond + + /*! \brief Constructor for an redirection-table entry + * + * Every entry in the redirection table represents an external source of + * interrupts and has a size of 64 bits. Due to the I/O APIC registers being + * only 32 bits wide, the constructor takes two 32 bit values. + * + * \param value_low First, low-order 32 bit value + * \param value_high Second, high-order 32 bit value + */ + RedirectionTableEntry(Register value_low, Register value_high) + : value_low(value_low), value_high(value_high) {} +}; + +static_assert(sizeof(RedirectionTableEntry) == 8, + "IOAPIC::RedirectionTableEntry has wrong size"); +} // namespace IOAPIC diff --git a/arch/ioport.h b/arch/ioport.h new file mode 100644 index 0000000..3f99263 --- /dev/null +++ b/arch/ioport.h @@ -0,0 +1,63 @@ +/*! \file + * \brief \ref IOPort provides access to the x86 IO address space + */ + +#pragma once +#include "../types.h" + +/*! \brief Abstracts access to the I/O address space + * + * x86 PCs have a separated I/O address space that is accessible only via the + * machine instructions `in` and `out`. An IOPort object encapsulates the + * corresponding address in the I/O address space and can be used for byte or + * word-wise reading or writing. + */ + +class IOPort { + /*! \brief Address in I/O address space + * + */ + uint16_t address; + + public: + /*! \brief Constructor + * \param addr Address from the I/O address space + */ + explicit constexpr IOPort(uint16_t addr) : address(addr) {} + + /*! \brief Write one byte to the I/O port + * \param val The value to be written + */ + void outb(uint8_t val) const { + asm volatile("out %%al, %%dx\n\t" : : "a"(val), "d"(address) :); + } + + /*! \brief Write one word (2 bytes) to the I/O port + * \param val The value to be written + */ + void outw(uint16_t val) const { + asm volatile("out %%ax, %%dx\n\t" : : "a"(val), "d"(address) :); + } + + /*! \brief Read one byte from the I/O port + * \return Read byte + */ + uint8_t inb() const { + uint8_t out = 0; + + asm volatile("in %%dx, %%al\n\t" : "=a"(out) : "d"(address) :); + + return out; + } + + /*! \brief Read one word (2 bytes) from the I/O port + * \return Read word (2 bytes) + */ + uint16_t inw() const { + uint16_t out = 0; + + asm volatile("inw %%dx, %%ax\n\t" : "=a"(out) : "d"(address) :); + + return out; + } +}; diff --git a/arch/lapic.cc b/arch/lapic.cc new file mode 100644 index 0000000..f723b29 --- /dev/null +++ b/arch/lapic.cc @@ -0,0 +1,190 @@ +#include "lapic.h" + +#include "lapic_registers.h" + +namespace LAPIC { + +/*! \brief Base Address + * used with offset to access memory mapped registers + */ +volatile uintptr_t base_address = 0xfee00000; + +Register read(Index idx) { + return *reinterpret_cast(base_address + idx); +} +void write(Index idx, Register value) { + *reinterpret_cast(base_address + idx) = value; +} + +/*! \brief Local APIC ID (for Pentium 4 and newer) + * + * Is assigned automatically during boot and should not be changed. + * + * \see [ISDMv3, 10.4.6 Local APIC ID](intel_manual_vol3.pdf#page=371) + */ +union IdentificationRegister { + struct { + uint32_t : 24, ///< (reserved) + apic_id : 8; ///< APIC ID + }; + Register value; + + IdentificationRegister() : value(read(Index::IDENTIFICATION)) {} +} __attribute__((packed)); + +/*! \brief Local APIC Version + * + * \see [ISDMv3 10.4.8 Local APIC Version + * Register](intel_manual_vol3.pdf#page=373) + */ +union VersionRegister { + struct { + uint32_t + version : 8, ///< 0x14 for P4 and Xeon, 0x15 for more recent hardware + : 8, ///< (reserved) + max_lvt_entry : 8, ///< Maximum number of local vector entries + suppress_eoi_broadcast : 1, ///< Support for suppressing EOI broadcasts + : 7; ///< (reserved) + }; + Register value; + + VersionRegister() : value(read(Index::VERSION)) {} +} __attribute__((packed)); + +/*! \brief Logical Destination Register + * \see [ISDMv3 10.6.2.2 Logical Destination + * Mode](intel_manual_vol3.pdf#page=385) + */ +union LogicalDestinationRegister { + struct { + uint32_t : 24, ///< (reserved) + lapic_id : 8; ///< Logical APIC ID + }; + Register value; + + LogicalDestinationRegister() : value(read(Index::LOGICAL_DESTINATION)) {} + ~LogicalDestinationRegister() { write(Index::LOGICAL_DESTINATION, value); } +} __attribute__((packed)); + +enum Model { CLUSTER = 0x0, FLAT = 0xf }; + +/*! \brief Destination Format Register + * + * \see [ISDMv3 10.6.2.2 Logical Destination + * Mode](intel_manual_vol3.pdf#page=385) + */ +union DestinationFormatRegister { + struct { + uint32_t : 28; ///< (reserved) + Model model : 4; ///< Model (Flat vs. Cluster) + }; + Register value; + DestinationFormatRegister() : value(read(Index::DESTINATION_FORMAT)) {} + ~DestinationFormatRegister() { write(Index::DESTINATION_FORMAT, value); } +} __attribute__((packed)); + +/*! \brief Task Priority Register + * + * \see [ISDMv3 10.8.3.1 Task and Processor + * Priorities](intel_manual_vol3.pdf#page=391) + */ +union TaskPriorityRegister { + struct { + uint32_t task_prio_sub : 4, ///< Task Priority Sub-Class + task_prio : 4, ///< Task Priority + : 24; ///< (reserved) + }; + Register value; + TaskPriorityRegister() : value(read(Index::TASK_PRIORITY)) {} + ~TaskPriorityRegister() { write(Index::TASK_PRIORITY, value); } +} __attribute__((packed)); + +/*! \brief APIC Software Status for Spurious Interrupt Vector */ +enum APICSoftware { + APIC_DISABLED = 0, + APIC_ENABLED = 1, +}; + +/*! \brief Focus Processor Checking for Spurious Interrupt Vector */ +enum FocusProcessorChecking { + CHECKING_ENABLED = 0, + CHECKING_DISABLED = 1, +}; + +/*! \brief Suppress End-Of-Interrupt-Broadcast for Spurious Interrupt Vector */ +enum SuppressEOIBroadcast { + BROADCAST = 0, + SUPPRESS_BROADCAST = 1, +}; + +/*! \brief Spurious Interrupt Vector Register + * + * \see [ISDMv3 10.9 Spurious Interrupt](intel_manual_vol3.pdf#page=394) + */ +union SpuriousInterruptVectorRegister { + struct { + uint32_t spurious_vector : 8; ///< Spurious Vector + APICSoftware apic_software : 1; ///< APIC Software Enable/Disable + FocusProcessorChecking + focus_processor_checking : 1; ///< Focus Processor Checking + uint32_t reserved_1 : 2; + SuppressEOIBroadcast eoi_broadcast_suppression : 1; + uint32_t reserved : 19; + }; + Register value; + + SpuriousInterruptVectorRegister() + : value(read(Index::SPURIOUS_INTERRUPT_VECTOR)) {} + ~SpuriousInterruptVectorRegister() { + write(Index::SPURIOUS_INTERRUPT_VECTOR, value); + } +} __attribute__((packed)); +static_assert(sizeof(SpuriousInterruptVectorRegister) == 4, + "LAPIC Spurious Interrupt Vector has wrong size"); + +uint8_t getID() { + IdentificationRegister ir; + return ir.apic_id; +} + +uint8_t getLogicalID() { + LogicalDestinationRegister ldr; + return ldr.lapic_id; +} + +uint8_t getVersion() { + VersionRegister vr; + return vr.version; +} + +void init(uint8_t logical_id) { + // reset logical destination ID + // can be set using setLogicalLAPICID() + LogicalDestinationRegister ldr; + ldr.lapic_id = logical_id; + + // set task priority to 0 -> accept all interrupts + TaskPriorityRegister tpr; + tpr.task_prio = 0; + tpr.task_prio_sub = 0; + + // set flat delivery mode + DestinationFormatRegister dfr; + dfr.model = Model::FLAT; + + // use 255 as spurious vector, enable APIC and disable focus processor + SpuriousInterruptVectorRegister sivr; + sivr.spurious_vector = 0xff; + sivr.apic_software = APICSoftware::APIC_ENABLED; + sivr.focus_processor_checking = FocusProcessorChecking::CHECKING_DISABLED; +} + +void endOfInterrupt() { + // dummy read + read(SPURIOUS_INTERRUPT_VECTOR); + + // signal end of interrupt + write(EOI, 0); +} + +} // namespace LAPIC diff --git a/arch/lapic.h b/arch/lapic.h new file mode 100644 index 0000000..c2c82f2 --- /dev/null +++ b/arch/lapic.h @@ -0,0 +1,199 @@ +/*! \file + * \brief \ref LAPIC abstracts access to the Local \ref APIC + */ + +#pragma once +#include "../types.h" + +/*! \brief Abstracts the local APIC (which is integrated into every CPU core) + * \ingroup interrupts + * + * In modern (x86) PCs, every CPU core has its own Local APIC (LAPIC). The + * LAPIC is the link between the local CPU core and the I/O APIC (that takes + * care about external interrupt sources. Interrupt messages received by the + * LAPIC will be passed to the corresponding CPU core and trigger the interrupt + * handler on this core. + * + * \see [ISDMv3 10.4 Local APIC](intel_manual_vol3.pdf#page=366) + */ +namespace LAPIC { +/*! \brief Initialized the local APIC of the calling CPU core and sets the + * logical LAPIC ID in the LDR register + * \param logical_id APIC ID to be set + */ +void init(uint8_t logical_id); + +/*! \brief Signalize EOI (End of interrupt) + * + * Signals to the LAPIC that the handling of the current interrupt finished. + * This function must be called at the end of interrupt handling before ireting. + */ +void endOfInterrupt(); + +/*! \brief Get the ID of the current core's LAPIC + * \return LAPIC ID + */ +uint8_t getID(); + +/*! \brief Get the Logical ID of the current core's LAPIC + * \return Logical ID + */ +uint8_t getLogicalID(); + +/*! \brief Set the Logical ID of the current core's LAPIC + * \param id new Logical ID + */ +void setLogicalID(uint8_t id); + +/*! \brief Get version number of local APIC + * \return version number + */ +uint8_t getVersion(); + +/*! \brief Inter-Processor Interrupts + * + * For multi-core systems, the LAPIC enables sending messages (Inter-Processor + * Interrupts, IPIs) to other CPU cores and receiving those sent from other + * cores. + * + * \see [ISDMv3 10.6 Issuing Interprocessor + * Interrupts](intel_manual_vol3.pdf#page=380) + */ +namespace IPI { + +/*! \brief Check if the previously sent IPI has reached its destination. + * + * \return `true` if the previous IPI was accepted from its target processor, + * otherwise `false` + */ +bool isDelivered(); + +/*! \brief Send an Inter-Processor Interrupt (IPI) + * \param destination ID of the target processor (use APIC::getLAPICID(core) ) + * \param vector Interrupt vector number to be triggered + */ +void send(uint8_t destination, uint8_t vector); + +/*! \brief Send an Inter-Processor Interrupt (IPI) to a group of processors + * \param logical_destination Mask containing the logical APIC IDs of the target + * processors (use APIC::getLogicalLAPICID()) + * \param vector Interrupt vector number to be triggered + */ +void sendGroup(uint8_t logical_destination, uint8_t vector); + +/*! \brief Send an Inter-Processor Interrupt (IPI) to all processors (including + * self) + * \param vector Interrupt vector number to be triggered + */ +void sendAll(uint8_t vector); + +/*! \brief Send an Inter-Processor Interrupt (IPI) to all other processors (all + * but self) + * \param vector Interrupt vector number to be triggered + */ +void sendOthers(uint8_t vector); + +/*! \brief Send an INIT request IPI to all other processors + * + * \note Only required for startup + * + * \param assert if `true` send an INIT, + * on `false` send an INIT Level De-assert + */ +void sendInit(bool assert = true); + +/*! \brief Send an Startup IPI to all other processors + * + * \note Only required for startup + * + * \param vector Pointer to a startup routine + */ +void sendStartup(uint8_t vector); + +} // namespace IPI + +/*! \brief Local Timer (for each LAPIC / CPU) + * + * \see [ISDMv3 10.5.4 APIC Timer](intel_manual_vol3.pdf#page=378) + */ +namespace Timer { + +/*! \brief Determines the \ref LAPIC::Timer frequency. + * + * This function will calculate the number of LAPIC-timer ticks passing in the + * course of one millisecond. To do so, this function will rely on PIT timer + * functionality and measure the tick delta between start and end of waiting for + * a predefined period. + * + * For measurement, the LAPIC-timer single-shot mode (without interrupts) is + * used; after measurement, the timer is disabled again. + * + * \note The timer is counting towards zero. + * + * \return Number of LAPIC-timer ticks per millisecond + * + * \todo(15) Implement Method + */ +uint32_t ticks(void); + +/*! \brief Set the \ref LAPIC::Timer. + * \param counter Initial counter value; decremented on every LAPIC timer tick + * \param divide Divider (power of 2, i.e., 1 2 4 8 16 32...) used as + * prescaler between bus frequency and LAPIC timer frequency: `LAPIC timer + * frequency = divide * bus frequency`. `divide` is a numerical parameter, the + * conversion to the corresponding bit mask is done internally by calling + * getClockDiv(). + * \param vector Interrupt vector number to be triggered on counter expiry + * \param periodic If set, the interrupt will be issued periodically + * \param masked If set, interrupts on counter expiry are suppressed + * + * \todo(15) Implement Method + */ +void set(uint32_t counter, uint8_t divide, uint8_t vector, bool periodic, + bool masked = false); + +/*! \brief Setup the \ref LAPIC::Timer. + * + * Initializes the \ref LAPIC::Timer + * in such a way that regular interrupts are triggered approx. every `us` + * microseconds when \ref LAPIC::Timer:::activate() is called. + * For this purpose, a suitable timer divisor is determined + * based on the timer frequency determined with \ref LAPIC::Timer::ticks(). + * This timer divisor has to be as small as possible, but large enough to + * prevent the 32bit counter from overflowing. + * + * \param us Desired interrupt interval in microseconds. + * \return Indicates if the interval could be set. + * + * \todo(15) Implement Method + */ +bool setup(uint32_t us); + +/*! \brief Retrieve the interrupt interval set during \ref LAPIC::Timer::setup() + * + * \return Interval in microseconds + * + * \todo(15) Implement method + */ +uint32_t interval(); + +/*! \brief Activate the timer on this core. + * + * The core local timer starts with the interval previously configured in + * \ref LAPIC::Timer::setup(). To get timer interrupts on all cores, this method + * must be called once per core (however, it is sufficient to call \ref + * LAPIC::Timer::setup() only once since the APIC-Bus frequency is the same on + * each core). + * + * \todo(15) Implement method + */ +void activate(); + +/*! \brief Set the LAPIC-timer interrupt mask + * \param masked If set, interrupts are suppressed on counter expiry. + * + * \todo(16) Implement for tick-less kernel + */ +void setMasked(bool masked); +} // namespace Timer +} // namespace LAPIC diff --git a/arch/lapic_ipi.cc b/arch/lapic_ipi.cc new file mode 100644 index 0000000..289d2be --- /dev/null +++ b/arch/lapic_ipi.cc @@ -0,0 +1,244 @@ +#include "lapic_registers.h" + +namespace LAPIC { +namespace IPI { + +/*! \brief Delivery mode specifies the type of interrupt sent to the CPU. */ +enum DeliveryMode { + FIXED = 0, ///< "ordinary" interrupt; send to ALL cores listed in the + ///< destination bit mask + LOWEST_PRIORITY = 1, ///< "ordinary" interrupt; send to the lowest priority + ///< core from destination mask + SMI = 2, ///< System Management Interrupt; vector number required to be 0 + // Reserved + NMI = 4, ///< Non-Maskable Interrupt, vector number ignored, only edge + ///< triggered + INIT = 5, ///< Initialization interrupt (always treated as edge triggered) + INIT_LEVEL_DEASSERT = 5, ///< Synchronization interrupt + STARTUP = 6, ///< Dedicated Startup-Interrupt (SIPI) + // Reserved +}; + +/*! \brief Way of interpreting the value written to the destination field. */ +enum DestinationMode { + PHYSICAL = 0, ///< Destination contains the physical destination APIC ID + LOGICAL = 1 ///< Destination contains a mask of logical APIC IDs +}; + +/*! \brief Interrupt state */ +enum DeliveryStatus { + IDLE = 0, ///< No activity for this interrupt + SEND_PENDING = + 1 ///< Interrupt will be sent as soon as the bus / LAPIC is ready +}; + +/*! \brief Interrupt level */ +enum Level { + DEASSERT = 0, ///< Must be zero when DeliveryMode::INIT_LEVEL_DEASSERT + ASSERT = 1 ///< Must be one for all other delivery modes +}; + +/*! \brief Trigger mode for DeliveryMode::INIT_LEVEL_DEASSERT */ +enum TriggerMode { + EDGE_TRIGGERED = 0, ///< edge triggered + LEVEL_TRIGGERED = 1 ///< level triggered +}; + +/*! \brief Shorthand for commonly used destinations */ +enum DestinationShorthand { + NO_SHORTHAND = 0, ///< Use destination field instead of shorthand + SELF = 1, ///< Send IPI to self + ALL_INCLUDING_SELF = 2, ///< Send IPI to all including self + ALL_EXCLUDING_SELF = 3 ///< Send IPI to all except self +}; + +/*! \brief Interrupt mask */ +enum InterruptMask { + UNMASKED = 0, ///< Interrupt entry is active (non-masked) + MASKED = 1 ///< Interrupt entry is deactivated (masked) +}; + +/*! \brief Interrupt Command + * + * \see [ISDMv3 10.6.1 Interrupt Command Register + * (ICR)](intel_manual_vol3.pdf#page=381) + */ +union InterruptCommand { + struct { + /*! \brief Interrupt vector in the \ref IDT "Interrupt Descriptor Table + * (IDT)" will be activated when the corresponding external interrupt + * triggers. + *//*! \brief Interrupt vector in the \ref IDT "Interrupt Descriptor Table (IDT)" will be + * activated when the corresponding external interrupt triggers. + */ + uint64_t vector : 8; + + /*! \brief The delivery mode denotes the way the interrupts will be + * delivered to the local CPU cores, respectively to their local APICs. + * + * For StuBS, we use `DeliveryMode::LowestPriority`, as all CPU cores have + * the same priority and we want to distribute interrupts evenly among them. + * It, however, is not guaranteed that this method of load balancing will + * work on every system. + */ + enum DeliveryMode delivery_mode : 3; + + /*! \brief The destination mode defines how the value stored in + * `destination` will be interpreted. + * + * For StuBS, we use `DestinationMode::Logical`. + */ + enum DestinationMode destination_mode : 1; + + /*! \brief Delivery status holds the current status of interrupt delivery. + * + * \note This field is read only; write accesses to this field will be + * ignored. + */ + enum DeliveryStatus delivery_status : 1; + + uint64_t : 1; ///< reserved + + /*! \brief The polarity denotes when an interrupt should be issued. + * + * For StuBS, we use `Polarity::High` (i.e., when the interrupt line is, + * logically, 1). + */ + enum Level level : 1; + + /*! \brief The trigger mode states whether the interrupt signaling is level + * or edge triggered. + * + * StuBS uses `TriggerMode::Edge` for Keyboard and Timer, the (optional) + * serial interface, however, needs `TriggerMode::Level`. + */ + enum TriggerMode trigger_mode : 1; + + uint64_t : 2; ///< reserved + + enum DestinationShorthand destination_shorthand : 2; + + uint64_t : 36; ///< Reserved, do not modify + + /*! \brief Interrupt destination. + * + * The meaning of destination depends on the destination mode: + * For the logical destination mode, destination holds a bit mask made up + * of the cores that are candidates for receiving the interrupt. In the + * single-core case, this value is `1`, in the multi-core case, the `n` + * low-order bits needs to be set (with `n` being the number of CPU cores, + * see \ref Core::count() ). Setting the `n` low-order bits marks all + * available cores as candidates for receiving interrupts and thereby + * balancing the number of interrupts between the cores. + * + * \note This form of load balancing depends on the hardware's behavior and + * may not work on all systems in the same fashion. Most notably, in QEMU + * all interrupts are sent to the BSP (core 0). + */ + uint64_t destination : 8; + } __attribute__((packed)); + + /*! \brief I/O redirection-table entry + * + * Every entry in the redirection table represents an external source of + * interrupts and has a size of 64 bits. Due to the I/O APIC registers being + * only 32 bits wide, the 64-bit value is split in two 32 bit values. + */ + struct { + Register value_low; ///< First, low-order register + Register value_high; ///< Second, high-order register + } __attribute__((packed)); + + /*! \brief Default constructor */ + InterruptCommand() = default; + + explicit InterruptCommand( + uint8_t destination, uint8_t vector = 0, + DestinationMode destination_mode = DestinationMode::PHYSICAL, + DeliveryMode delivery_mode = DeliveryMode::FIXED, + TriggerMode trigger_mode = TriggerMode::EDGE_TRIGGERED, + Level level = Level::ASSERT) { + readRegister(); + this->vector = vector; + this->delivery_mode = delivery_mode; + this->destination_mode = destination_mode; + this->level = level; + this->trigger_mode = trigger_mode; + this->destination_shorthand = DestinationShorthand::NO_SHORTHAND; + this->destination = destination; + } + + InterruptCommand(DestinationShorthand destination_shorthand, uint8_t vector, + DeliveryMode delivery_mode = DeliveryMode::FIXED, + TriggerMode trigger_mode = TriggerMode::EDGE_TRIGGERED, + Level level = Level::ASSERT) { + readRegister(); + this->vector = vector; + this->delivery_mode = delivery_mode; + this->level = level; + this->trigger_mode = trigger_mode; + this->destination_shorthand = destination_shorthand; + this->destination = destination; + } + + void send() const { + write(INTERRUPT_COMMAND_REGISTER_HIGH, value_high); + write(INTERRUPT_COMMAND_REGISTER_LOW, value_low); + } + + bool isSendPending() { + value_low = read(INTERRUPT_COMMAND_REGISTER_LOW); + return delivery_status == DeliveryStatus::SEND_PENDING; + } + + private: + void readRegister() { + while (isSendPending()) { + } + value_high = read(INTERRUPT_COMMAND_REGISTER_HIGH); + } +}; +static_assert(sizeof(InterruptCommand) == 8, + "LAPIC Interrupt Command has wrong size"); + +bool isDelivered() { + InterruptCommand ic; + return !ic.isSendPending(); +} + +void send(uint8_t destination, uint8_t vector) { + InterruptCommand ic(destination, vector); + ic.send(); +} + +void sendGroup(uint8_t logical_destination, uint8_t vector) { + InterruptCommand ic(logical_destination, vector, DestinationMode::LOGICAL); + ic.send(); +} + +void sendAll(uint8_t vector) { + InterruptCommand ic(DestinationShorthand::ALL_INCLUDING_SELF, vector); + ic.send(); +} + +void sendOthers(uint8_t vector) { + InterruptCommand ic(DestinationShorthand::ALL_EXCLUDING_SELF, vector); + ic.send(); +} + +void sendInit(bool assert) { + LAPIC::IPI::InterruptCommand ic( + DestinationShorthand::ALL_EXCLUDING_SELF, 0, DeliveryMode::INIT, + assert ? TriggerMode::EDGE_TRIGGERED : TriggerMode::LEVEL_TRIGGERED, + assert ? Level::ASSERT : Level::DEASSERT); + ic.send(); +} + +void sendStartup(uint8_t vector) { + InterruptCommand ic(DestinationShorthand::ALL_EXCLUDING_SELF, vector, + DeliveryMode::STARTUP); + ic.send(); +} + +} // namespace IPI +} // namespace LAPIC diff --git a/arch/lapic_registers.h b/arch/lapic_registers.h new file mode 100644 index 0000000..d71b5d7 --- /dev/null +++ b/arch/lapic_registers.h @@ -0,0 +1,54 @@ +/*! \file + * \brief Structures and macros for accessing \ref LAPIC "the local APIC". + */ + +#pragma once +#include "../types.h" + +namespace LAPIC { +// Memory Mapped Base Address +extern volatile uintptr_t base_address; + +typedef uint32_t Register; + +/*! \brief Register Offset Index + * + * \see [ISDMv3 10.4.1 The Local APIC Block + * Diagram](intel_manual_vol3.pdf#page=368) + */ +enum Index : uint16_t { + IDENTIFICATION = + 0x020, ///< Local APIC ID Register, RO (sometimes R/W). Do not change! + VERSION = 0x030, ///< Local APIC Version Register, RO + TASK_PRIORITY = 0x080, ///< Task Priority Register, R/W + EOI = 0x0b0, ///< EOI Register, WO + LOGICAL_DESTINATION = 0x0d0, ///< Logical Destination Register, R/W + DESTINATION_FORMAT = + 0x0e0, ///< Destination Format Register, bits 0-27 RO, bits 28-31 R/W + SPURIOUS_INTERRUPT_VECTOR = 0x0f0, ///< Spurious Interrupt Vector Register, + ///< bits 0-8 R/W, bits 9-1 R/W + INTERRUPT_COMMAND_REGISTER_LOW = + 0x300, ///< Interrupt Command Register 1, R/W + INTERRUPT_COMMAND_REGISTER_HIGH = + 0x310, ///< Interrupt Command Register 2, R/W + TIMER_CONTROL = 0x320, ///< LAPIC timer control register, R/W + TIMER_INITIAL_COUNTER = 0x380, ///< LAPIC timer initial counter register, R/W + TIMER_CURRENT_COUNTER = 0x390, ///< LAPIC timer current counter register, RO + TIMER_DIVIDE_CONFIGURATION = + 0x3e0 ///< LAPIC timer divide configuration register, RW +}; + +/*! \brief Get value from APIC register + * + * \param idx Register Offset Index + * \return current value of register + */ +Register read(Index idx); + +/*! \brief Write value to APIC register + * + * \param idx Register Offset Index + * \param value value to be written into register + */ +void write(Index idx, Register value); +} // namespace LAPIC diff --git a/arch/lapic_timer.cc b/arch/lapic_timer.cc new file mode 100644 index 0000000..2597cd1 --- /dev/null +++ b/arch/lapic_timer.cc @@ -0,0 +1,91 @@ +#include "lapic.h" +#include "lapic_registers.h" + +namespace LAPIC { +namespace Timer { + +/*! \brief Timer Delivery Status */ +enum DeliveryStatus { IDLE = 0, SEND_PENDING = 1 }; + +/*! \brief Timer Mode */ +enum TimerMode { + ONE_SHOT = 0, + PERIODIC = 1, + DEADLINE = 2 + // reserved +}; + +/*! \brief Timer Mask */ +enum Mask { NOT_MASKED = 0, MASKED = 1 }; + +static const Register INVALID_DIV = 0xff; + +/*! \brief LAPIC-Timer Control Register + * + * \see [ISDMv3 10.5.1 Local Vector Table](intel_manual_vol3.pdf#page=375) + */ +union ControlRegister { + struct { + uint32_t vector : 8; ///< Vector + uint32_t : 4; + DeliveryStatus delivery_status : 1; ///< Delivery Status (readonly) + uint32_t : 3; + Mask masked : 1; ///< Interrupt Mask (if set, interrupt will not trigger) + TimerMode timer_mode : 2; ///< Timer Mode + uint32_t : 13; + }; + Register value; +} __attribute__((packed)); + +/*! \brief LAPIC timer divider table + * + * \see [ISDMv3 10.5.4 APIC Timer](intel_manual_vol3.pdf#page=378) + */ +static const Register div_masks[] = { + 0xb, ///< divides by 1 + 0x0, ///< divides by 2 + 0x1, ///< divides by 4 + 0x2, ///< divides by 8 + 0x3, ///< divides by 16 + 0x8, ///< divides by 32 + 0x9, ///< divides by 64 + 0xa ///< divides by 128 +}; + +/*! \brief Calculate the bit mask for the LAPIC-timer divider. + * \param div Divider, must be power of two: 1, 2, 4, 8, 16, 32, 64, 128 + * \return Bit mask for LAPIC::setTimer() or `0xff` if `div` is invalid. + */ +Register getClockDiv(uint8_t div) { + (void)div; + return 0; +} + +uint32_t ticks(void) { + uint32_t ticks = 0; // ticks per millisecond + // Calculation (Assignment 5) + return ticks; +} + +void set(uint32_t counter, uint8_t divide, uint8_t vector, bool periodic, + bool masked) { + (void)counter; + (void)divide; + (void)vector; + (void)periodic; + (void)masked; +} + +bool setup(uint32_t us) { + (void)us; + return false; +} + +uint32_t interval() { return 0; } + +void activate() {} + +void setMasked(bool masked) { (void)masked; } + +} // namespace Timer +} // namespace LAPIC diff --git a/arch/pic.cc b/arch/pic.cc new file mode 100644 index 0000000..0bfb504 --- /dev/null +++ b/arch/pic.cc @@ -0,0 +1,63 @@ +#include "pic.h" + +#include "ioport.h" + +namespace PIC { + +void initialize() { + // Access primary & secondary PIC via two ports each + IOPort primary_port_a(0x20); + IOPort primary_port_b(0x21); + IOPort secondary_port_a(0xa0); + IOPort secondary_port_b(0xa1); + + // Initialization Command Word 1 (ICW1) + // Basic PIC configuration, starting initialization + enum InitializationCommandWord1 { + ICW4_NEEDED = 1 << 0, // use Initialization Command Word 4 + SINGLE_MODE = 1 << 1, // Single or multiple (cascade mode) 8259A + ADDRESS_INTERVAL_HALF = + 1 << 2, // 4 or 8 bit interval between the interrupt vector locations + LEVEL_TRIGGERED = 1 << 3, // Level or edge triggered + ALWAYS_1 = 1 << 4, + }; + const uint8_t icw1 = InitializationCommandWord1::ICW4_NEEDED | + InitializationCommandWord1::ALWAYS_1; + // ICW1 in port A (each) + primary_port_a.outb(icw1); + secondary_port_a.outb(icw1); + + // Initialization Command Word 2 (ICW2): + // Configure interrupt vector base offset in port B + primary_port_b.outb(0x20); // Primary: IRQ Offset 32 + secondary_port_b.outb(0x28); // Secondary: IRQ Offset 40 + + // Initialization Command Word 3 (ICW3): + // Configure pin on primary PIC connected to secondary PIC + const uint8_t pin = 2; // Secondary connected on primary pin 2 + primary_port_b.outb(1 << pin); // Pin as bit mask for primary + secondary_port_b.outb(pin); // Pin as value (ID) for secondary + + // Initialization Command Word 4 (ICW4) + // Basic PIC configuration, starting initialization + enum InitializationCommandWord4 { + MODE_8086 = 1 << 0, // 8086/8088 or 8085 mode + AUTO_EOI = 1 << 1, // Single or multiple (cascade mode) 8259A + BUFFER_PRIMARY = 1 << 2, // Primary or secondary buffering + BUFFERED_MODE = + 1 << 3, // Enable or disable buffering (for primary or secondary above) + SPECIAL_FULLY_NESTED = 1 << 4 // Special or non special fully nested + }; + const uint8_t icw4 = InitializationCommandWord4::MODE_8086 | + InitializationCommandWord4::AUTO_EOI; + // ICW3 in port B (each) + primary_port_b.outb(icw4); + secondary_port_b.outb(icw4); + + // Operation Control Word 1 (OCW1): + // Disable (mask) all hardware interrupts on both legacy PICs (we'll use APIC) + secondary_port_b.outb(0xff); + primary_port_b.outb(0xff); +} + +} // namespace PIC diff --git a/arch/pic.h b/arch/pic.h new file mode 100644 index 0000000..9baf514 --- /dev/null +++ b/arch/pic.h @@ -0,0 +1,18 @@ +/*! \file + * \brief Handle (disable) the old Programmable Interrupt Controller (PIC) + */ + +#pragma once +#include "../types.h" + +/*! \brief The Programmable Interrupt Controller (PIC aka 8259A) + */ +namespace PIC { + +/*! \brief Initialize the PICs (Programmable Interrupt Controller, 8259A), + * such that all 15 hardware interrupts are stored sequentially in the \ref IDT + * and the hardware interrupts are disabled (in favor of \ref APIC). + */ +void initialize(); + +} // namespace PIC diff --git a/arch/pit.cc b/arch/pit.cc new file mode 100644 index 0000000..29da750 --- /dev/null +++ b/arch/pit.cc @@ -0,0 +1,225 @@ +#include "pit.h" + +#include "core.h" +#include "ioport.h" + +namespace PIT { + +// we only use PIT channel 2 +const uint8_t CHANNEL = 2; +static IOPort data(0x40 + CHANNEL); + +/*! \brief Access mode + */ +enum AccessMode { + LATCH_COUNT_VALUE = 0, + LOW_BYTE_ONLY = 1, + HIGH_BYTE_ONLY = 2, + LOW_AND_HIGH_BYTE = 3 +}; + +/*! \brief Operating Mode + * + * \warning Channel 2 is not able to send interrupts, however, the status bit + * will be set + */ +enum OperatingMode { + INTERRUPT_ON_TERMINAL_COUNT = 0, + PROGRAMMABLE_ONE_SHOT = 1, + RATE_GENERATOR = 2, + SQUARE_WAVE_GENERATOR = 3, ///< useful for the PC speaker + SOFTWARE_TRIGGERED_STROBE = 4, + HARDWARE_TRIGGERED_STROBE = 5 +}; + +/*! \brief data format + */ +enum Format { + BINARY = 0, + BCD = 1 ///< Binary Coded Decimals +}; + +// Mode register (only writable) +static IOPort mode_register(0x43); +union Mode { + struct { + Format format : 1; + OperatingMode operating : 3; + AccessMode access : 2; + uint8_t channel : 2; + }; + uint8_t value; + + /*! \brief Constructor for mode, takes the numeric value */ + explicit Mode(uint8_t value) : value(value) {} + + /*! \brief Constructor for counting mode + * \param access Access mode to the 16-bit counter value + * \param operating Operating mode for the counter + * \param format Number format for the 16-bit counter values (binary or + * BCD) + */ + Mode(AccessMode access, OperatingMode operating, Format format) + : format(format), + operating(operating), + access(access), + channel(PIT::CHANNEL) {} + + /*! \brief (Default) constructor for reading the counter value + */ + Mode() : value(0) { this->channel = PIT::CHANNEL; } + + /*! \brief Write the value to the mode register + */ + void write() const { mode_register.outb(value); } +}; + +// The NMI Status and Control Register contains details about PIT counter 2 +static IOPort controlRegister(0x61); +union Control { + /*! \brief I/O-port bitmap for the NMI Status and Control Register + * \note Over time, the meaning of the bits stored at I/O port 0x61 changed; + * don't get the structure confused with old documentation on the IBM PC XT + * platform. + * \see [Intel® I/O Controller Hub 7 (ICH7) + * Family](i-o-controller-hub-7-datasheet.pdf#page=415), page 415 + */ + struct { + //! If enabled, the interrupt state will be visible at status_timer_counter2 + uint8_t enable_timer_counter2 : 1; + uint8_t enable_speaker_data : 1; ///< If set, speaker output is equal to + ///< status_timer_counter2 + uint8_t enable_pci_serr : 1; ///< not important, do not modify + uint8_t enable_nmi_iochk : 1; ///< not important, do not modify + const uint8_t + refresh_cycle_toggle : 1; ///< not important, must be 0 on write + const uint8_t + status_timer_counter2 : 1; ///< will be set on timer expiration; must + ///< be 0 on write + const uint8_t + status_iochk_nmi_source : 1; ///< not important, must be 0 on write + const uint8_t + status_serr_nmi_source : 1; ///< not important, must be 0 on write + }; + uint8_t value; + + /*! \brief Constructor + * \param value Numeric value for the control register + */ + explicit Control(uint8_t value) : value(value) {} + + /*! \brief Default constructor + * Automatically reads the current contents from the control register. + */ + Control() : value(controlRegister.inb()) {} + + /*! \brief Write the current state to the control register. + */ + void write() const { controlRegister.outb(value); } +}; + +// The base frequency is, due to historic reasons, 1.193182 MHz. +const uint64_t BASE_FREQUENCY = 1193182ULL; + +bool set(uint16_t us) { + // Counter ticks for us + uint64_t counter = BASE_FREQUENCY * us / 1000000ULL; + + // As the hardware counter has a size of 16 bit, we want to check whether the + // calculated counter value is too large ( > 54.9ms ) + if (counter > 0xffff) { + return false; + } + + // Interrupt state should be readable in status register, but PC speaker + // should remain off + Control c; + c.enable_speaker_data = 0; + c.enable_timer_counter2 = 1; + c.write(); + + // Channel 2, 16-bit divisor, with mode 0 (interrupt) in binary format + Mode m(AccessMode::LOW_AND_HIGH_BYTE, + OperatingMode::INTERRUPT_ON_TERMINAL_COUNT, Format::BINARY); + m.write(); + + // Set the counter's start value + data.outb(counter & 0xff); // low + data.outb((counter >> 8) & 0xff); // high + + return true; +} + +uint16_t get(void) { + // Set mode to reading + Mode m; + m.write(); + + // Read low and high + uint16_t value = data.inb(); + value |= data.inb() << 8; + + return value; +} + +bool isActive(void) { + Control c; // reads the current value from the control register + return c.enable_timer_counter2 == 1 && c.status_timer_counter2 == 0; +} + +bool waitForTimeout(void) { + while (true) { + Control c; // reads the current value from the control register + if (c.enable_timer_counter2 == 0) { + return false; + } else if (c.status_timer_counter2 == 1) { + return true; + } else { + Core::pause(); + } + } +} + +bool delay(uint16_t us) { return set(us) && waitForTimeout(); } + +void pcspeaker(uint32_t freq) { + Control c; + if (freq == 0) { + disable(); + } else { + // calculate frequency divider + uint64_t div = BASE_FREQUENCY / freq; + if (div > 0xffff) { + div = 0xffff; + } + + // check if already configured + if (c.enable_speaker_data == 0) { + // if not, set mode + Mode m(AccessMode::LOW_AND_HIGH_BYTE, + OperatingMode::SQUARE_WAVE_GENERATOR, Format::BINARY); + m.write(); + } + + // write frequency divider + data.outb(div & 0xff); + data.outb((div >> 8) & 0xff); + + // already configured? (second part to prevent playing a wrong sound) + if (c.enable_speaker_data == 0) { + // activate PC speaker + c.enable_speaker_data = 1; + c.enable_timer_counter2 = 1; + c.write(); + } + } +} + +void disable(void) { + Control c; + c.enable_speaker_data = 0; + c.enable_timer_counter2 = 0; + c.write(); +} + +} // namespace PIT diff --git a/arch/pit.h b/arch/pit.h new file mode 100644 index 0000000..0f22f35 --- /dev/null +++ b/arch/pit.h @@ -0,0 +1,80 @@ +/*! \file + * \brief The old/historical \ref PIT "Programmable Interval Timer (PIT)" + */ + +#pragma once +#include "../types.h" + +/*! \brief Abstraction of the historical Programmable Interval Timer (PIT). + * + * Historically, PCs had a Timer component of type 8253 or 8254, modern systems + * come with a compatible chip. Each of these chips provides three 16-bit wide + * counters ("channel"), each running at a frequency of 1.19318 MHz. The timer's + * counting speed is thereby independent from the CPU frequency. + * + * Traditionally, the first counter (channel 0) was used for triggering + * interrupts, the second one (channel 1) controlled the memory refresh, and the + * third counter (channel 2) was assigned to the PC speaker. + * + * As the PIT's frequency is fixed to a constant value of 1.19318 MHz, the PIT + * can be used for calibration. For this purpose, we use channel 2 only. + * + * \note Interrupts should be disabled while configuring the timer. + */ +namespace PIT { + +/*! \brief Start timer + * + * Sets the channel 2 timer to the provided value and starts counting. + * + * \note The maximum waiting time is approx. 54,900 us (16 bit / 1.193 MHz). + * \param us Waiting time in us + * \return `true` if the counter is running; `false` if the waiting time + * exceeds the limits. + */ +bool set(uint16_t us); + +/*! \brief Reads the current timer value + * \return Current timer value + */ +uint16_t get(void); + +/*! \brief Check if the timer is running + * \return `true` if running, `false` otherwise + */ +bool isActive(void); + +/*! \brief (Active) waiting for timeout + * \return `true` when timeout was successfully hit, `false` if the timer was + * not active prior to calling. + */ +bool waitForTimeout(void); + +/*! \brief Set the timer and wait for timeout + * \note The maximum waiting time is approx. 54,900 us (16 bit / 1.193 MHz). + * \param us Waiting time in us + * \return `true` when waiting successfully terminated; `false` on error (e.g., + * waiting time exceeds its limits) + */ +bool delay(uint16_t us); + +/*! \brief Play a given frequency on the PC speaker. + * + * As the PC speaker is connected to PIT channel 2, the PIT can be used to play + * an acoustic signal. Playing sounds occupies the PIT, so it cannot be used for + * other purposes while playback. + * + * \note Not every PC has an activated PC speaker + * \note Qemu & KVM have to be launched with `-soundhw pcspk`. + * If you still cannot hear anything, try to set `QEMU_AUDIO_DRV` to + * `alsa` (by launching \StuBS with `QEMU_AUDIO_DRV=alsa make kvm`) + * \param freq Frequency (in Hz) of the sound to be played, or 0 to deactivate + * playback. + */ +void pcspeaker(uint32_t freq); + +/*! \brief Deactivate the timer + */ +void disable(void); + +} // namespace PIT diff --git a/arch/serial.cc b/arch/serial.cc new file mode 100644 index 0000000..0f79b16 --- /dev/null +++ b/arch/serial.cc @@ -0,0 +1,41 @@ +#include "serial.h" + +Serial::Serial(ComPort port, BaudRate baud_rate, DataBits data_bits, + StopBits stop_bits, Parity parity) + : port(port) { + // initialize FIFO mode, no irqs for sending, irq if first byte was received + + // line control, select r/w of divisor latch register + writeReg(LINE_CONTROL_REGISTER, DIVISOR_LATCH_ACCESS_BIT); + + // TODO: Implement here the correct handling of input arguments + (void)baud_rate; + (void)data_bits; + (void)stop_bits; + (void)parity; + + // FIFO: Enable & clear buffers + writeReg(FIFO_CONTROL_REGISTER, + ENABLE_FIFO | CLEAR_RECEIVE_FIFO | CLEAR_TRANSMIT_FIFO); + + // Modem Control: OUT2 (0000 1000) must be set for interrupt + writeReg(MODEM_CONTROL_REGISTER, OUT_2); +} + +void Serial::writeReg(RegisterIndex reg, char out) { + // TODO: Implement + (void)reg; + (void)out; +} + +char Serial::readReg(RegisterIndex reg) { + // TODO: Implement + (void)reg; + return '\0'; +} + +int Serial::write(char out) { + // TODO: Implement + (void)out; + return 0; +} diff --git a/arch/serial.h b/arch/serial.h new file mode 100644 index 0000000..d2b471b --- /dev/null +++ b/arch/serial.h @@ -0,0 +1,205 @@ +/*! \file + * \brief Communication via the \ref Serial interface (RS-232) + */ + +#pragma once +#include "../types.h" + +/*! \brief Serial interface. + * \ingroup io + * + * This class provides a serial interface (COM1 - COM4) for communication with + * the outside world. + * + * The first IBM PC used the external chip [8250 + * UART](https://de.wikipedia.org/wiki/NSC_8250), whereas, in today's systems, + * this functionality is commonly integrated into the motherboard chipset, but + * remained compatible. + * + * \see [PC8250A Data Sheet](uart-8250a.pdf#page=11) (Registers on page 11) + * \see [PC16550D Data Sheet](uart-16550d.pdf#page=16) (Successor, for optional + * FIFO buffer, page 16) + */ + +class Serial { + public: + /*! \brief COM-Port + * + * The serial interface and its hardware addresses. Modern desktop PCs have, + * at most, a single, physical COM-port (`COM1`) + */ + enum ComPort { + COM1 = 0x3f8, + COM2 = 0x2f8, + COM3 = 0x3e8, + COM4 = 0x2e8, + }; + + /*! \brief Transmission speed + * + * The unit Baud describes the transmission speed in number of symbols per + * seconds. 1 Baud therefore equals the transmission of 1 symbol per second. + * The possible Baud rates are whole-number dividers of the clock frequency + * of 115200 Hz.. + */ + enum BaudRate { + BAUD_300 = 384, + BAUD_600 = 192, + BAUD_1200 = 96, + BAUD_2400 = 48, + BAUD_4800 = 24, + BAUD_9600 = 12, + BAUD_19200 = 6, + BAUD_38400 = 3, + BAUD_57600 = 2, + BAUD_115200 = 1, + }; + + /*! \brief Number of data bits per character */ + enum DataBits : uint8_t { + DATA_5BIT = 0, + DATA_6BIT = 1, + DATA_7BIT = 2, + DATA_8BIT = 3, + }; + + /*! \brief Number of stop bits per character */ + enum StopBits : uint8_t { + STOP_1BIT = 0, + STOP_1_5BIT = 4, + STOP_2BIT = 4, + }; + + /*! \brief parity bit */ + enum Parity : uint8_t { + PARITY_NONE = 0, + PARITY_ODD = 8, + PARITY_EVEN = 24, + PARITY_MARK = 40, + PARITY_SPACE = 56, + }; + + private: + /*! \brief register index */ + enum RegisterIndex { + // if Divisor Latch Access Bit [DLAB] = 0 + RECEIVE_BUFFER_REGISTER = 0, ///< read only + TRANSMIT_BUFFER_REGISTER = 0, ///< write only + INTERRUPT_ENABLE_REGISTER = 1, + + // if Divisor Latch Access Bit [DLAB] = 1 + DIVISOR_LOW_REGISTER = 0, + DIVISOR_HIGH_REGISTER = 1, + + // (irrespective from DLAB) + INTERRUPT_IDENT_REGISTER = 2, ///< read only + FIFO_CONTROL_REGISTER = + 2, ///< write only -- 16550 and newer (esp. not 8250a) + LINE_CONTROL_REGISTER = 3, ///< highest-order bit is DLAB (see above) + MODEM_CONTROL_REGISTER = 4, + LINE_STATUS_REGISTER = 5, + MODEM_STATUS_REGISTER = 6 + }; + + /*! \brief Mask for the respective register */ + enum RegisterMask : uint8_t { + // Interrupt Enable Register + RECEIVED_DATA_AVAILABLE = 1 << 0, + TRANSMITTER_HOLDING_REGISTER_EMPTY = 1 << 1, + RECEIVER_LINE_STATUS = 1 << 2, + MODEM_STATUS = 1 << 3, + + // Interrupt Ident Register + INTERRUPT_PENDING = 1 << 0, ///< 0 means interrupt pending + INTERRUPT_ID_0 = 1 << 1, + INTERRUPT_ID_1 = 1 << 2, + + // FIFO Control Register + ENABLE_FIFO = 1 << 0, ///< 0 means disabled ^= conforming to 8250a + CLEAR_RECEIVE_FIFO = 1 << 1, + CLEAR_TRANSMIT_FIFO = 1 << 2, + DMA_MODE_SELECT = 1 << 3, + TRIGGER_RECEIVE = 1 << 6, + + // Line Control Register + // bits per character: 5 6 7 8 + WORD_LENGTH_SELECT_0 = 1 << 0, // Setting Select0: 0 1 0 1 + WORD_LENGTH_SELECT_1 = 1 << 1, // Setting Select1: 0 0 1 1 + NUMBER_OF_STOP_BITS = 1 << 2, // 0 ≙ one stop bit, 1 ≙ 1.5/2 stop bits + PARITY_ENABLE = 1 << 3, + EVEN_PARITY_SELECT = 1 << 4, + STICK_PARITY = 1 << 5, + SET_BREAK = 1 << 6, + DIVISOR_LATCH_ACCESS_BIT = 1 << 7, // DLAB + + // Modem Control Register + DATA_TERMINAL_READY = 1 << 0, + REQUEST_TO_SEND = 1 << 1, + OUT_1 = 1 << 2, + OUT_2 = 1 << 3, // must be set for interrupts! + LOOP = 1 << 4, + + // Line Status Register + DATA_READY = 1 << 0, // Set when there is a value in the receive buffer + OVERRUN_ERROR = 1 << 1, + PARITY_ERROR = 1 << 2, + FRAMING_ERROR = 1 << 3, + BREAK_INTERRUPT = 1 << 4, + TRANSMITTER_HOLDING_REGISTER = 1 << 5, + TRANSMITTER_EMPTY = 1 << 6, // Send buffer empty (ready to send) + + // Modem Status Register + DELTA_CLEAR_TO_SEND = 1 << 0, + DELTA_DATA_SET_READY = 1 << 1, + TRAILING_EDGE_RING_INDICATOR = 1 << 2, + DELTA_DATA_CARRIER_DETECT = 1 << 3, + CLEAR_TO_SEND = 1 << 4, + DATA_SET_READY = 1 << 5, + RING_INDICATOR = 1 << 6, + DATA_CARRIER_DETECT = 1 << 7 + }; + + /*! \brief Read value from register + * + * \todo(11) Implement Method + * + * \param reg Register index + * \return The value read from register + */ + char readReg(RegisterIndex reg); + + /*! \brief Write value to register + * + * \todo(11) Implement Method + * + * \param reg Register index + * \param out value to be written + */ + void writeReg(RegisterIndex reg, char out); + + protected: + /*! \brief Selected COM port */ + const ComPort port; + + public: + /*! \brief Constructor + * + * Creates a Serial object that encapsulates the used COM port, as well as the + * parameters used for the serial connection. Default values are `8N1` (8 bit, + * no parity bit, one stop bit) with 115200 Baud using COM1. + * + * \todo(11) - Implement Constructor + */ + explicit Serial(ComPort port = COM1, BaudRate baud_rate = BAUD_115200, + DataBits data_bits = DATA_8BIT, + StopBits stop_bits = STOP_1BIT, Parity parity = PARITY_NONE); + + /*! \brief Write one byte to the serial interface + * + * \todo(11) - Implement Method + * + * \param out Byte to be written + * \return Byte written (or `-1` if writing byte failed) + */ + int write(char out); +}; diff --git a/arch/system.cc b/arch/system.cc new file mode 100644 index 0000000..7ac837e --- /dev/null +++ b/arch/system.cc @@ -0,0 +1,16 @@ +#include "system.h" + +#include "../debug/output.h" +#include "cmos.h" +#include "ioport.h" + +namespace System { + +void reboot() { + const IOPort system_control_port_a(0x92); + DBG_VERBOSE << "rebooting smp" << endl; + CMOS::write(CMOS::REG_STATUS_SHUTDOWN, 0); + system_control_port_a.outb(0x3); +} + +} // namespace System diff --git a/arch/system.h b/arch/system.h new file mode 100644 index 0000000..47c6191 --- /dev/null +++ b/arch/system.h @@ -0,0 +1,15 @@ +/*! \file + * \brief General \ref System functionality (\ref System::reboot "reboot") + */ + +#pragma once +#include "../types.h" + +/*! \brief General System functions + */ +namespace System { + +/*! \brief Perform a reboot + */ +void reboot(); +} // namespace System diff --git a/arch/textwindow.cc b/arch/textwindow.cc new file mode 100644 index 0000000..9505213 --- /dev/null +++ b/arch/textwindow.cc @@ -0,0 +1,41 @@ +#include "textwindow.h" + +TextWindow::TextWindow(unsigned from_col, unsigned to_col, unsigned from_row, + unsigned to_row, bool use_cursor) { + (void)from_col; + (void)to_col; + (void)from_row; + (void)to_row; + (void)use_cursor; +} + +void TextWindow::setPos(unsigned rel_x, unsigned rel_y) { + (void)rel_x; + (void)rel_y; +} + +void TextWindow::getPos(unsigned& rel_x, unsigned& rel_y) const { + (void)rel_x; + (void)rel_y; +} + +void TextWindow::setPos(int rel_x, int rel_y) { + (void)rel_x; + (void)rel_y; +} + +void TextWindow::getPos(int& rel_x, int& rel_y) const { + (void)rel_x; + (void)rel_y; +} + +void TextWindow::print(const char* str, size_t length, CGA::Attribute attrib) { + (void)str; + (void)length; + (void)attrib; +} + +void TextWindow::reset(char character, CGA::Attribute attrib) { + (void)character; + (void)attrib; +} diff --git a/arch/textwindow.h b/arch/textwindow.h new file mode 100644 index 0000000..73588ab --- /dev/null +++ b/arch/textwindow.h @@ -0,0 +1,134 @@ +/*! \file + * \brief \ref TextWindow provides virtual output windows in text mode + */ + +#pragma once +#include "../types.h" +#include "cga.h" + +/*! \brief Virtual windows in text mode + * \ingroup io + * + * Outputs text on a part of the screen in \ref CGA, + * a window is defined in by position and size (with its own cursor). + * + * This allows to separate the output of the application from the debug output + * on the screen without having to synchronize. + */ +class TextWindow { + // Prevent copies and assignments + TextWindow(const TextWindow&) = delete; + TextWindow& operator=(const TextWindow&) = delete; + + public: + /*! \brief Constructor of a text window + * + * Creates a virtual, rectangular text window on the screen. + * The coordinates to construct the window are absolute positions in the + * \ref CGA screen. + * + * \note Overlapping windows are neither supported nor prevented -- better + * just try to avoid construction windows with overlapping coordinates! + * + * \warning Don't use the hardware cursor in more than one window! + * + * \param from_col Text Window starts in column `from_col`, + * the first (leftmost) possible column is `0` + * \param to_col Text Window extends to the right to column `to_col` + * (exclusive). This column has to be strictly greater than `from_col`, the + * maximum allowed value is \ref CGA::COLUMNS (rightmost) + * \param from_row Text Window starts in row `from_row`, + * the first possible (uppermost) row is `0` + * \param to_row Text Window extends down to row `to_row` (exclusive). + * This row has to be strictly greater than `from_row`, + * the maximum allowed value is \ref CGA::ROWS (bottom-most) + * \param use_cursor Specifies whether the hardware cursor (`true`) or a + * software cursor/variable (`false`) should be used to + * store the current position + * + * \todo(11) Implement constructor + */ + TextWindow(unsigned from_col, unsigned to_col, unsigned from_row, + unsigned to_row, bool use_cursor = false); + + /*! \brief Set the cursor position in the window + * + * Depending on the constructor parameter `use_cursor` either the + * hardware cursor (and only the hardware cursor!) is used or the position + * is stored internally in the object. + * + * The coordinates are relative to the upper left starting position of + * the window. + * + * \param rel_x Column in window + * \param rel_y Row in window + * \todo(11) Implement method, use \ref CGA::setCursor() for the hardware + * cursor + */ + void setPos(unsigned rel_x, unsigned rel_y); + + /*! \brief Set the cursor position in the window + * + * Depending on the constructor parameter `use_cursor` either the + * hardware cursor (and only the hardware cursor!) is used or the position + * is stored internally in the object. + * + * The coordinates are relative to the upper left starting position of + * the window. + * Negative coordinates are interpreted relative to the right and bottom + * border of the window. + * + * \todo(11) Implement this method (it can either use or replace + * \ref setPos(unsigned, unsigned)) + */ + void setPos(int rel_x, int rel_y); + + /*! \brief Get the current cursor position in the window + * + * Depending on the constructor parameter `use_cursor` either the + * hardware cursor (and only the hardware cursor!) is used or the position + * is retrieved from the internally stored object. + * + * \param rel_x Column in window + * \param rel_y Row in window + * \todo(11) Implement Method, use \ref CGA::getCursor() for the hardware + * cursor + */ + void getPos(unsigned& rel_x, unsigned& rel_y) const; + + /// \copydoc TextWindow::getPos(unsigned&,unsigned&) const + void getPos(int& rel_x, int& rel_y) const; + + /*! \brief Display multiple characters in the window + * + * Output a character string, starting at the current cursor position. + * Since the string does not need to contain a `\0` termination (unlike the + * common C string), a length parameter is required to specify the number + * of characters in the string. + * When the output is complete, the cursor is positioned after the last + * printed character. + * The same attributes (colors) are used for the entire text. + * + * If there is not enough space left at the end of the line, + * the output continues on the following line. + * As soon as the last window line is filled, the entire window area is + * moved up one line: The first line disappears, the bottom line is cleared. + * + * A line break also occurs whenever the character `\n` appears in the text. + * + * \param string Text to be printed + * \param length Length of text + * \param attrib Attribute for text + * \todo(11) Implement Method + */ + void print(const char* string, size_t length, + CGA::Attribute attrib = CGA::Attribute()); // NOLINT + + /*! \brief Delete all contents in the window and reset the cursor. + * + * \param character Fill character + * \param attrib Attribute for fill character + * \todo(11) Implement Method + */ + void reset(char character = ' ', CGA::Attribute attrib = CGA::Attribute()); +}; diff --git a/boot/longmode.asm b/boot/longmode.asm new file mode 100644 index 0000000..0c932d0 --- /dev/null +++ b/boot/longmode.asm @@ -0,0 +1,245 @@ +; The stony path to Long Mode (64-bit)... +; ... begins in 32-bit Protected Mode +[BITS 32] + +; Pointer to Long Mode Global Descriptor Table (GDT, arch/gdt.cc) +[EXTERN gdt_long_mode_pointer] + +[GLOBAL long_mode] +long_mode: + +; You can check if the CPU supports Long Mode by using the `cpuid` command. +; Problem: You first have to figure out if the `cpuid` command itself is +; supported. Therefore, you have to try to reverse the 21st bit in the EFLAGS +; register -- if it works, then there is the 'cpuid' instruction. +CPUID_BIT_MASK equ 1 << 21 + +check_cpuid: + ; Save EFLAGS on stack + pushfd + + ; Copy stored EFLAGS from stack to EAX register + mov eax, [esp] + + ; Flip the 21st bit (ID) in EAX + xor eax, CPUID_BIT_MASK + + ; Copy EAX to EFLAGS (using the stack) + push eax + popfd + + ; And reverse: copy EFLAGS to EAX (using the stack) + ; (but the 21st bit should now still be flipped, if `cpuid` is supported) + pushfd + pop eax + + ; Compare the new EFLAGS copy (residing in EAX) with the EFLAGS stored at + ; the beginning of this function by using an exclusive OR -- all different + ; (flipped) bits will be stored in EAX. + xor eax, [esp] + + ; Restore original EFLAGS + popfd + + ; If 21st Bit in EAX is set, `cpuid` is supported -- continue at check_long_mode + and eax, CPUID_BIT_MASK + jnz check_long_mode + + ; Show error message "No CPUID" and stop CPU + mov dword [0xb8000], 0xcf6fcf4e + mov dword [0xb8004], 0xcf43cf20 + mov dword [0xb8008], 0xcf55cf50 + mov dword [0xb800c], 0xcf44cf49 + hlt + +; Now you are able to use the `cpuid` instruction to check if Long Mode is +; available -- after you've checked if the `cpuid` is able to perform the +; check itself (since it is an extended `cpuid` function)... + +CPUID_GET_LARGEST_EXTENDED_FUNCTION_NUMBER equ 0x80000000 +CPUID_GET_EXTENDED_PROCESSOR_FEATURES equ 0x80000001 +CPUID_HAS_LONGMODE equ 1 << 29 + +check_long_mode: + ; Set argument for `cpuid` to check the availability of extended functions + ; and call cpuid + mov eax, CPUID_GET_LARGEST_EXTENDED_FUNCTION_NUMBER + cpuid + ; The return value contains the maximum function number supported by `cpuid`, + ; You'll need the function number for extended processor features + cmp eax, CPUID_GET_EXTENDED_PROCESSOR_FEATURES + ; If not present, the CPU is definitely too old to support long mode + jb no_long_mode + + ; Finally, you are able to check the Long Mode support itself + mov eax, CPUID_GET_EXTENDED_PROCESSOR_FEATURES + cpuid + ; If the return value in the EDX register has set the 29th bit, + ; then long mode is supported -- continue with setup_paging + test edx, CPUID_HAS_LONGMODE + jnz setup_paging + +no_long_mode: + ; Show error message "No 64bit" and stop CPU + mov dword [0xb8000], 0xcf6fcf4e + mov dword [0xb8004], 0xcf36cf20 + mov dword [0xb8008], 0xcf62cf34 + mov dword [0xb800c], 0xcf74cf69 + hlt + +; Paging is required for Long Mode. +; Since an extensive page manager might be a bit of an overkill to start with, +; the following code creates an identity mapping for the first four gigabytes +; (using huge pages): each virtual address will point to the same physical one. +; This area (up to 4 GiB) is important for some memory mapped devices (APIC) +; and you don't want to remap them yet for simplicity reasons. +; In the advanced operating systems lecture, this topic is covered in detail, +; however, if you want a quick overview, have a look at +; https://wiki.osdev.org/Page_Tables#2_MiB_pages_2 + +PAGE_SIZE equ 4096 +PAGE_FLAGS_PRESENT equ 1 << 0 +PAGE_FLAGS_WRITEABLE equ 1 << 1 +PAGE_FLAGS_USER equ 1 << 2 +PAGE_FLAGS_HUGE equ 1 << 7 + +setup_paging: + ; Unlike in Protected Mode, an entry in the page table has a size of 8 bytes + ; (vs 4 bytes), so there are only 512 (and not 1024) entries per table. + ; Structure of the 3-level PAE paging: One entry in the + ; - lv2: Page-Directory-Table (PDT) covers 2 MiB (1 Huge Page) + ; - lv3: Page-Directory-Pointer-Table (PDPT) covers 1 GiB (512 * 2 MiB) + ; - lv4: Page-Map-Level-4-Table (PML4) covers 512 GiB (512 * 1 GiB) + + ; To address 4 GiB only four level-2 tables are required. + ; All entries of the level-2 tables should be marked as writeable (attributes) + ; and map (point to) the corresponding physical memory. + + ; This is done in a loop using ECX as counter + mov ecx, 0 + +.identitymap_level2: + ; Calculate physical address in EAX (2 MiB multiplied by the counter) + mov eax, 0x200000 + mul ecx + ; Configure page attributes + or eax, PAGE_FLAGS_PRESENT | PAGE_FLAGS_WRITEABLE | PAGE_FLAGS_HUGE | PAGE_FLAGS_USER + ; Write (8 byte) entry in the level-2 table + mov [paging_level2_tables + ecx * 8], eax + + ; Increment counter... + inc ecx + ; ... until all four level-2 tables are filled + cmp ecx, 512 * 4 + jne .identitymap_level2 + + ; The first four entries of the level-3 table should point to the + ; four level-2 tables (and be writeable as well). + ; Again, ECX acts as counter for the loop + mov ecx, 0 + +.identitymap_level3: + ; Calculate the address: ECX * PAGE_SIZE + paging_level2_tables + mov eax, ecx + ; The size of a page is stored in the EDX register + mov edx, PAGE_SIZE + mul edx + add eax, paging_level2_tables + ; Configure attributes + or eax, PAGE_FLAGS_PRESENT | PAGE_FLAGS_WRITEABLE | PAGE_FLAGS_USER + ; Write (8 byte) entry in the level-3 table + mov [paging_level3_table + ecx * 8], eax + + ; Increment counter... + inc ecx + ; ... until all four entries of the table are written + cmp ecx, 4 + jne .identitymap_level3 + + mov eax, paging_level2_tables + or eax, PAGE_FLAGS_PRESENT | PAGE_FLAGS_WRITEABLE | PAGE_FLAGS_USER + mov [paging_level3_table], eax + + ; The first entry of the level-4 table should point to to the level-3 table + mov eax, paging_level3_table + or eax, PAGE_FLAGS_PRESENT | PAGE_FLAGS_WRITEABLE | PAGE_FLAGS_USER + mov [paging_level4_table], eax + +; Time to activate paging +paging_enable: + ; First setup the control registers + + ; Write the address of the level-4 table into the CR3 register + mov eax, paging_level4_table + mov cr3, eax + + ; Activate Physical Address Extension (PAE) + ; by setting the 5th bits in the CR4 register + mov eax, cr4 + or eax, 1 << 5 + mov cr4, eax + + ; Set the Long Mode Enable Bit in den EFER MSR + ; (Extended Feature Enable Register Model Specific Register) + mov ecx, 0xC0000080 + rdmsr + or eax, 1 << 8 + wrmsr + + ; Finally, the 31st bit in CR0 is set to enable Paging + mov eax, cr0 + or eax, 1 << 31 + mov cr0, eax + + ; Load Long Mode Global Descriptor Table + lgdt [gdt_long_mode_pointer] + + ; Far jump to the 64-bit start code + jmp 0x8:long_mode_start + + ; print `KO` to screen + mov dword [0xb8000], 0x3f4f3f4b + hlt + +; Memory reserved for page tables +[SECTION .bss] + +align 4096 + +[GLOBAL paging_level4_table] +[GLOBAL paging_level3_table] +[GLOBAL paging_level2_tables] +; 1x Level-4 Table (Page Map Level 4) +paging_level4_table: + resb PAGE_SIZE + +; 1x Level-3 Table (Page Directory Pointer Table) +paging_level3_table: + resb PAGE_SIZE + +; 4x Level-2 Table (Page Directory) +paging_level2_tables: + resb PAGE_SIZE * 4 + +[SECTION .text] +[EXTERN kernel_init] ; C++ entry function + +; Continue with 64 bit code +[BITS 64] + +long_mode_start: + ; Zero all segment register + mov ax, 0x0 + mov ss, ax + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + + ; Call high-level (C++) kernel initialization function + call kernel_init + + ; Print `STOP` to screen and stop + mov rax, 0x2f502f4f2f544f53 + mov qword [0xb8000], rax + hlt diff --git a/boot/multiboot/config.inc b/boot/multiboot/config.inc new file mode 100644 index 0000000..1e764f3 --- /dev/null +++ b/boot/multiboot/config.inc @@ -0,0 +1,22 @@ +; Magic Header, has to be present in Kernel to indicate Multiboot compliance +MULTIBOOT_HEADER_MAGIC_OS equ 0x1badb002 + +; Answer by the boot loader for Multiboot compliance, written in eax register +MULTIBOOT_HEADER_MAGIC_LOADER equ 0x2badb002 + +; Flags instructing the Multiboot compliant boot loader to setup the system +; according to your needs +MULTIBOOT_PAGE_ALIGN equ 1<<0 ; Align boot modules (initrds) at 4 KiB border +MULTIBOOT_MEMORY_INFO equ 1<<1 ; Request Memory Map information +MULTIBOOT_VIDEO_MODE equ 1<<2 ; Configure video mode + +MULTIBOOT_HEADER_FLAGS equ 0 + +; Desired video mode (only considered if MULTIBOOT_VIDEO_MODE set) +; (boot loader will choose the best fitting mode, which might differ from the settings below) +MULTIBOOT_VIDEO_WIDTH equ 1280 ; Desired width +MULTIBOOT_VIDEO_HEIGHT equ 1024 ; Desired height +MULTIBOOT_VIDEO_BITDEPTH equ 32 ; Desired bit depth + +; Checksum +MULTIBOOT_HEADER_CHKSUM equ -(MULTIBOOT_HEADER_MAGIC_OS + MULTIBOOT_HEADER_FLAGS) diff --git a/boot/multiboot/data.cc b/boot/multiboot/data.cc new file mode 100644 index 0000000..0f0f704 --- /dev/null +++ b/boot/multiboot/data.cc @@ -0,0 +1,167 @@ +#include "boot/multiboot/data.h" + +/*! \brief Multiboot Information Structure according to Specification + * \see [Multiboot Specification]{@ref multiboot} + */ +struct multiboot_info { + /*! \brief Helper Structure + */ + struct Array { + uint32_t size; ///< Length + uint32_t addr; ///< Begin (physical address) + } __attribute__((packed)); + + enum Flag : uint32_t { + Memory = 1 << 0, ///< is there basic lower/upper memory information? + BootDev = 1 << 1, ///< is there a boot device set? + CmdLine = 1 << 2, ///< is the command-line defined? + Modules = 1 << 3, ///< are there modules to do something with? + /* These next two are mutually exclusive */ + SymbolTable = 1 << 4, ///< is there an a.out symbol table loaded? + SectionHeader = 1 << 5, ///< is there an ELF section header table? + + MemoryMap = 1 << 6, ///< is there a full memory map? + DriveInfo = 1 << 7, ///< Is there drive info? + ConfigTable = 1 << 8, ///< Is there a config table? + BootLoaderName = 1 << 9, ///< Is there a boot loader name? + ApmTable = 1 << 10, ///< Is there a APM table? + + // Is there video information? + VbeInfo = 1 << 11, ///< Vesa bios extension + FramebufferInfo = 1 << 12 ///< Framebuffer + } flags; + + /*! \brief Available memory retrieved from BIOS + */ + struct { + uint32_t lower; ///< Amount of memory below 1 MiB in kilobytes + uint32_t upper; ///< Amount of memory above 1 MiB in kilobytes + } mem __attribute__((packed)); + uint32_t boot_device; ///< "root" partition + uint32_t cmdline; ///< Kernel command line + Array mods; ///< List of boot modules + union { + /*! \brief Symbol table for kernel in a.out format + */ + struct { + uint32_t tabsize; + uint32_t strsize; + uint32_t addr; + uint32_t reserved; + } aout_symbol_table __attribute__((packed)); + + /*! \brief Section header table for kernel in ELF + */ + struct { + uint32_t num; ///< Number of entries + uint32_t size; ///< Size per entry + uint32_t addr; ///< Start of the header table + uint32_t shndx; ///< String table index + } elf_section_header_table __attribute__((packed)); + }; + + struct Array mmap; ///< Memory Map + struct Array drives; ///< Drive Information + uint32_t config_table; ///< ROM configuration table + uint32_t boot_loader_name; ///< Boot Loader Name + uint32_t apm_table; ///< APM table + + struct Multiboot::VBE vbe; ///< VBE Information + struct Multiboot::Framebuffer framebuffer; ///< Framebuffer information + + /*! \brief Check if setting is available + * \param flag Flag to check + * \return `true` if available + */ + bool has(enum Flag flag) const { return (flags & flag) != 0; } +} __attribute__((packed)); +assert_size(multiboot_info, 116); + +/*! \brief The pointer to the multiboot structures will be assigned in the + * assembler startup code (multiboot.inc) + */ +struct multiboot_info *multiboot_addr = 0; + +namespace Multiboot { +Module *getModule(unsigned i) { + if (multiboot_addr != nullptr && + multiboot_addr->has(multiboot_info::Flag::Modules) && + i < multiboot_addr->mods.size) { + return i + reinterpret_cast( + static_cast(multiboot_addr->mods.addr)); + } else { + return nullptr; + } +} + +unsigned getModuleCount() { return multiboot_addr->mods.size; } + +void *Memory::getStartAddress() const { + if (sizeof(void *) == 4 && (addr >> 32) != 0) { + return reinterpret_cast(addr & 0xffffffff); + } else { + return reinterpret_cast(static_cast(addr)); + } +} + +void *Memory::getEndAddress() const { + uint64_t end = addr + len; + if (sizeof(void *) == 4 && (end >> 32) != 0) { + return reinterpret_cast(addr & 0xffffffff); + } else { + return reinterpret_cast(static_cast(end)); + } +} + +bool Memory::isAvailable() const { return type == AVAILABLE; } + +Memory *Memory::getNext() const { + if (multiboot_addr != nullptr && + multiboot_addr->has(multiboot_info::Flag::MemoryMap)) { + uintptr_t next = reinterpret_cast(this) + size + sizeof(size); + if (next < multiboot_addr->mmap.addr + multiboot_addr->mmap.size) { + return reinterpret_cast(next); + } + } + return nullptr; +} + +Memory *getMemoryMap() { + if (multiboot_addr != nullptr && + multiboot_addr->has(multiboot_info::Flag::MemoryMap) && + multiboot_addr->mmap.size > 0) { + return reinterpret_cast( + static_cast(multiboot_addr->mmap.addr)); + } else { + return nullptr; + } +} + +char *getCommandLine() { + return reinterpret_cast( + static_cast(multiboot_addr->cmdline)); +} + +char *getBootLoader() { + return reinterpret_cast( + static_cast(multiboot_addr->boot_loader_name)); +} + +VBE *getVesaBiosExtensionInfo() { + if (multiboot_addr != nullptr && + multiboot_addr->has(multiboot_info::Flag::VbeInfo)) { + return &(multiboot_addr->vbe); + } else { + return nullptr; + } +} + +Framebuffer *getFramebufferInfo() { + if (multiboot_addr != nullptr && + multiboot_addr->has(multiboot_info::Flag::FramebufferInfo)) { + return &(multiboot_addr->framebuffer); + } else { + return nullptr; + } +} +} // namespace Multiboot diff --git a/boot/multiboot/data.h b/boot/multiboot/data.h new file mode 100644 index 0000000..0ce88dc --- /dev/null +++ b/boot/multiboot/data.h @@ -0,0 +1,230 @@ +/*! \file + * \brief \ref Multiboot Interface + */ +#pragma once +#include "../../compiler/fix.h" +#include "../../debug/assert.h" +#include "../../types.h" + +/*! \brief Interface for Multiboot + * + * Due to historical reasons, a normal BIOS allows you to do quite an egg dance + * until you finally reach the actual kernel (especially with only 512 bytes + * available in the master boot record...). + * Fortunately, there are [boot loaders](https://wiki.osdev.org/Bootloader) that + * (partly) do this ungrateful job for you: + * They load your kernel into memory, switch (the bootstrap processor) to + * protected mode (32 bit) and jump to the entry point of our kernel -- saving + * you a lot of boring (or enlightening?) work: reading ancient systems + * documentation. One of the most famous representatives is the [Grand Unified + * Bootloader (GRUB)](https://www.gnu.org/software/grub/), which is also the + * reference implementation of the [Multiboot Specification]{@ref multiboot}. + * + * A Multiboot compliant boot loader will prepare the system according to your + * needs and can hand you a lot of useful information (e.g. references to + * initial ramdisks). + * + * However, you have to inform the loader that you are also compliant to the + * specification, and (if required) instruct the loader to adjust specific + * settings (e.g. the graphics mode). + * + * For this purpose you have to configure the beginning of the kernel (the first + * 8192 bytes of the kernel binary) accordingly (see `compiler/section.ld`) -- + * this is were the boot loader will search for a magic header and parse the + * subsequent entries containing the desired system configuration. + * In StuBS these flags are set in `boot/multiboot/config.inc` and the header + * structure is generated in `boot/multiboot/header.asm`. + * + * The first step in your \ref startup_bsp() "kernel entry function" is saving + * the pointer to the struct with the information from the boot loader + * (transferred via register `ebx`) -- and \ref Multiboot provides you the + * interface to comfortably access its contents! + */ +namespace Multiboot { +/*! \brief Boot Module + * (also known as `initrd` = initial Ramdisk) + * + * \see [1.7 Boot modules]{@ref multiboot} + * \see [3.3 Boot information format]{@ref multiboot} + */ +class Module { + uint32_t start; ///< Start address + uint32_t end; ///< End address (excluded) + uint32_t cmdline; ///< commandline parameter + uint32_t pad [[maybe_unused]]; ///< alignment; must be 0 + + public: + /*! \brief Get start of this boot module + * \return Pointer to begin of modules physical address + */ + void* getStartAddress() const { + return reinterpret_cast(static_cast(start)); + } + + /*! \brief Get end of this boot module + * \return Pointer beyond the modules physical address + */ + void* getEndAddress() const { + return reinterpret_cast(static_cast(end)); + } + + /*! \brief Get the size of this boot module + * \return Module size in bytes (difference of end and start address) + */ + size_t getSize() const { return static_cast(end - start); } + + /*! \brief Get the command line for this module + * \return pointer to zero terminated string + */ + char* getCommandLine() const { + return reinterpret_cast(static_cast(cmdline)); + } +} __attribute__((packed)); +assert_size(Module, 16); + +/*! \brief Retrieve a certain boot module + * \param i boot module number + * \return Pointer to structure with boot module information + */ +Module* getModule(unsigned i); + +/*! \brief Get the number of modules + * \return Pointer to structure with boot module information + */ +unsigned getModuleCount(); + +/*! \brief Get the kernel command line + * \return pointer to zero terminated string + */ +char* getCommandLine(); + +/*! \brief Get the name of the boot loader + * \return pointer to zero terminated string + */ +char* getBootLoader(); + +/*! \brief Memory Map + * + * The boot loader queries the BIOS for a memory map and stores its result in + * (something like) a linked list. However, this list may not be complete, + * can have contradictory entries and does not take the location of your kernel + * or any boot modules into account. + * (Anyways, it is still the best memory map you will have in StuBS...) + * + * \note Needs to be enabled explicitly by setting the `MULTIBOOT_MEMORY_INFO` + * flag in the multiboot header (see `boot/multiboot/config.inc`)! + * + * \see [Detecting Memory](https://wiki.osdev.org/Detecting_Memory_(x86)) + */ +class Memory { + uint32_t size; ///< Size of this entry (can exceed size of the class, rest + ///< will be padding bits) + uint64_t addr; ///< Begin of memory area + uint64_t len; ///< length of the memory area + + /*! \brief Usage Type + */ + enum Type : uint32_t { + AVAILABLE = 1, ///< Memory is available and usable in kernel + RESERVED = 2, ///< Memory is reserved (without further explanation) + ACPI = 3, ///< Memory may be reclaimed by ACPI + NVS = 4, ///< Memory is non volatile storage for ACPI + BADRAM = 5 ///< Area contains bad memory + } type; + + public: + /*! \brief Get start of this memory area + * \return Pointer to begin of the physical address of the memory area + */ + void* getStartAddress() const; + + /*! \brief Get end of this memory area + * \return Pointer beyond the physical address of this memory area + */ + void* getEndAddress() const; + + /*! \brief Is the memory marked as usable + * \return `true` if available, `false` if not usable. + */ + bool isAvailable() const; + + /*! \brief Get the next memory area + * \return pointer to the next memory area entry + */ + Memory* getNext() const; +} __attribute__((packed)); +assert_size(Memory, 24); + +/*! \brief Retrieve the first entry of the memory map + */ +Memory* getMemoryMap(); + +/*! \brief Video mode: Vesa BIOS Extension + * + * \see [VESA BIOS Extension (VBE) Core Functions (Version 3)](vbe3.pdf) + */ +struct VBE { + uint32_t control_info; ///< Pointer to VBE control information + uint32_t mode_info; ///< Pointer to VBE mode information + uint16_t mode; ///< Selected video mode (as defined in the standard) + uint16_t interface_seg; ///< Protected mode interface (unused) + uint16_t interface_off; ///< Protected mode interface (unused) + uint16_t interface_len; ///< Protected mode interface (unused) +} __attribute__((packed)); +assert_size(VBE, 16); + +/*! \brief Get pointer to Vesa BIOS Extension information + * + * \note Only available if the `MULTIBOOT_VIDEO_MODE` flag was explicitly set + * in the multiboot header (see `boot/multiboot/config.inc`)! + */ +VBE* getVesaBiosExtensionInfo(); + +/*! \brief Video mode: Framebuffer + * + * This beautiful structure contains everything required for using the graphic + * framebuffer in a very handy manner -- however, it may not be well supported + * by current boot loaders... + * These information can be retrieved from \ref VBE as well, though you then + * have to parse these huge structures containing a lot of useless stuff. + */ +struct Framebuffer { + uint64_t address; ///< Physical address of the framebuffer + uint32_t pitch; ///< Number of bytes per row + uint32_t width; ///< Width of framebuffer + uint32_t height; ///< Height of framebuffer + uint8_t bpp; ///< Bits per pixel + enum Type : uint8_t { + INDEXED = 0, ///< Using a custom color palette + RGB = 1, ///< Standard red-green-blue + EGA_TEXT = 2 ///< Enhanced Graphics Adapter color palette + } type; + union { + /*! \brief For INDEXED type + */ + struct { + uint32_t palette_addr; ///< Address of an array with RGB values + uint16_t palette_num_colors; ///< Number of colors (in array above) + } __attribute__((packed)); + + /*! \brief For RGB type + */ + struct { + uint8_t offset_red; ///< Offset of red value + uint8_t bits_red; ///< Bits used in red value + uint8_t offset_green; ///< Offset of green value + uint8_t bits_green; ///< Bits used in green value + uint8_t offset_blue; ///< Offset of blue value + uint8_t bits_blue; ///< Bits used in blue value + } __attribute__((packed)); + } __attribute__((packed)); +} __attribute__((packed)); +assert_size(Framebuffer, 28); + +/*! \brief Get pointer to framebuffer information + * + * \note Only available if the `MULTIBOOT_VIDEO_MODE` flag was explicitly set + * in the multiboot header (see `boot/multiboot/config.inc`)! + */ +Framebuffer* getFramebufferInfo(); +} // namespace Multiboot diff --git a/boot/multiboot/header.asm b/boot/multiboot/header.asm new file mode 100644 index 0000000..70f5117 --- /dev/null +++ b/boot/multiboot/header.asm @@ -0,0 +1,33 @@ +; The first 8192 bytes of the kernel binary must contain a header with +; predefined (and sometimes "magic") values according to the Multiboot standard. +; Based on these values, the boot loader decides whether and how to load the +; kernel -- which is compiled and linked into an ELF file. +; To make this possible with your StuBS kernel, the linker places the following +; entry `multiboot_header` at the very beginning of the file thanks to the +; linker script (located in compiler/sections.ld). + +[SECTION .multiboot_header] + +; Include configuration +%include 'boot/multiboot/config.inc' + +; Multiboot Header +align 4 +multiboot_header: + dd MULTIBOOT_HEADER_MAGIC_OS ; Magic Header Value + dd MULTIBOOT_HEADER_FLAGS ; Flags (affects following entries) + dd MULTIBOOT_HEADER_CHKSUM ; Header Checksum + + ; Following fields would have been required to be defined + ; if flag A_OUT KLUDGE was set (but we don't need this) + dd 0 ; Header address + dd 0 ; Begin of load address + dd 0 ; end of load address + dd 0 ; end of bss segment + dd 0 ; address of entry function + + ; Following fields are required for video mode (flag MULTIBOOT_VIDEO_MODE) + dd 0 ; Mode: 0 = Graphic / 1 = Text + dd MULTIBOOT_VIDEO_WIDTH ; Width (pixels / columns) + dd MULTIBOOT_VIDEO_HEIGHT ; Height (pixels / rows) + dd MULTIBOOT_VIDEO_BITDEPTH ; color depth / number of colors diff --git a/boot/startup.asm b/boot/startup.asm new file mode 100644 index 0000000..4edb814 --- /dev/null +++ b/boot/startup.asm @@ -0,0 +1,77 @@ +; This is the actual entry point of the kernel. +; The switch into the 32-bit 'Protected Mode' has already been performed +; (by the boot loader). +; The assembly code just performs the absolute necessary steps (like setting up +; the stack) to be able to jump into the C++ code -- and continue further +; initialization in a (more) high-level language. + +[BITS 32] + +; External functions and variables +[EXTERN CPU_CORE_STACK_SIZE] ; Constant containing the initial stack size (per CPU core), see `arch/core.cc` +[EXTERN cpu_core_stack_pointer] ; Pointer to reserved memory for CPU core stacks, see `arch/core.cc` +[EXTERN gdt_protected_mode_pointer] ; Pointer to 32 Bit Global Descriptor Table (located in `arch/gdt.cc`) +[EXTERN long_mode] ; Low level function to jump into the 64-bit mode ('Long Mode', see `boot/longmode.asm`) +[EXTERN multiboot_addr] ; Variable, in which the Pointer to Multiboot information + ; structure should be stored (`boot/multiboot/data.cc`) + +; Load Multiboot settings +%include "boot/multiboot/config.inc" + +[SECTION .text] + +; Entry point for the bootstrap processor (CPU0) +[GLOBAL startup_bsp] +startup_bsp: + ; Check if kernel was booted by a Multiboot compliant boot loader + cmp eax, MULTIBOOT_HEADER_MAGIC_LOADER + jne skip_multiboot + ; Pointer to Multiboot information structure has been stored in ebx by the + ; boot loader -- copy to a variable for later usage. + mov [multiboot_addr], ebx + +skip_multiboot: + ; Disable interrupts + cli + ; Disable non maskable interrupts (NMI) + ; (we are going to ignore them) + mov al, 0x80 + out 0x70, al + + jmp load_cs + +; Segment initialization +; (code used by bootstrap and application processors as well) +[GLOBAL segment_init] +segment_init: + ; Load temporary protected mode Global Descriptor Table (GDT) + lgdt [gdt_protected_mode_pointer] + + ; Initialize segment register + mov ax, 0x10 + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + ; Load code segment register + jmp 0x8:load_cs + +load_cs: + ; Initialize stack pointer: + ; Atomic increment of `cpu_core_stack_pointer` by `CPU_CORE_STACK_SIZE` + ; (to avoid race conditions at application processor boot) + mov eax, [CPU_CORE_STACK_SIZE] + lock xadd [cpu_core_stack_pointer], eax + ; Since the stack grows into the opposite direction, + ; Add `CPU_CORE_STACK_SIZE` again + add eax, [CPU_CORE_STACK_SIZE] + ; Assign stack pointer + mov esp, eax + + ; Clear direction flag for string operations + cld + + ; Switch to long mode (64 bit) + jmp long_mode diff --git a/boot/startup.cc b/boot/startup.cc new file mode 100644 index 0000000..16893c5 --- /dev/null +++ b/boot/startup.cc @@ -0,0 +1,73 @@ +#include "startup.h" + +#include "../arch/acpi.h" +#include "../arch/apic.h" +#include "../arch/core.h" +#include "../arch/idt.h" +#include "../arch/pic.h" +#include "../compiler/libc.h" +#include "../debug/output.h" +#include "../interrupt/handlers.h" + +/*! \brief The first processor is the Bootstrap Processor (BSP) + */ +static bool isBootstrapProcessor = true; + +extern "C" [[noreturn]] void kernel_init() { + if (isBootstrapProcessor) { + isBootstrapProcessor = false; + // Setup and load Interrupt Description Table (IDT) + initInterruptHandlers(); + + // Initialize PICs + PIC::initialize(); + + // Call global constructors + CSU::initializer(); + + // Initialize ACPI + if (!ACPI::init()) { + DBG_VERBOSE << "No ACPI!"; + Core::die(); + } + // Initialize APIC (using ACPI) + if (!APIC::init()) { + DBG_VERBOSE << "APIC Initialization failed"; + Core::die(); + } + + // Initialize the Bootstrap Processor + Core::init(); + + // Go to main function + main(); + + // Exit CPU + DBG_VERBOSE << "CPU core " << Core::getID() << " (BSP) shutdown." << endl; + Core::exit(); + } else { + // Load Interrupt Description Table (IDT) + IDT::load(); + + // Initialize this application processor + Core::init(); + + // And call the AP main + main_ap(); + + // Exit CPU + DBG_VERBOSE << "CPU core " << Core::getID() << " (AP) shutdown." << endl; + Core::exit(); + } + + // Only on last core + if (Core::countOnline() == 1) { + // Call global destructors + CSU::finalizer(); + } + + // wait forever + while (true) { + Core::die(); + } +} diff --git a/boot/startup.h b/boot/startup.h new file mode 100644 index 0000000..34436ff --- /dev/null +++ b/boot/startup.h @@ -0,0 +1,50 @@ +/*! \file + * \brief Startup of the first core, also known as bootstrap processor (BSP) + */ +#pragma once +#include "../compiler/fix.h" +#include "../types.h" + +/*! \brief Entry point of your kernel + * + * \ingroup Startup + * + * Executed by boot loader. + * Stores Pointer to \ref Multiboot information structure, + * initializes stack pointer, + * switches to long mode + * and finally calls the C++ \ref kernel_init function + */ +extern "C" void startup_bsp() ERROR_ON_CALL( + "The kernel entry point shall never be called from your code!"); + +/*! \brief Initializes the C++ environment and detects system components + * + * \ingroup Startup + * + * The startup code(both for \ref startup_bsp "bootstrap" and \ref startup_ap + * "application processor") jumps to this high level function. After + * initialization it will call \ref main() + */ +/*! or \ref main_ap() respectively + */ +extern "C" [[noreturn]] void kernel_init() ERROR_ON_CALL( + "The kernel init function shall never be called from your code!"); + +/*! \brief Kernels main function + * + * Called after initialization of the system by \ref kernel_init() + */ +/*! \note This code will only be executed on the booting CPU (i.e., the one with + * ID 0). + */ +extern "C" int main(); + +/*! \brief Entry point for application processors + * + * Called after initialization of the system by \ref kernel_init() + * + * \note Code in this function will be executed on all APs (i.e., all CPUs + * except ID 0) + */ +extern "C" int main_ap(); diff --git a/boot/startup_ap.asm b/boot/startup_ap.asm new file mode 100644 index 0000000..7f824eb --- /dev/null +++ b/boot/startup_ap.asm @@ -0,0 +1,68 @@ + +; Startup of the remaining application processors (in real mode) +; and switching to 'Protected Mode' with a temporary GDT. +; This code is relocated by ApplicationProcessor::relocateSetupCode() + +[SECTION .setup_ap_seg] +[GLOBAL setup_ap_gdt] +[GLOBAL setup_ap_gdtd] + +; Unlike the bootstrap processor, the application processors have not been +; set up by the boot loader -- they start in real mode (16 bit) and have to be +; switched manually to protected mode (32 bit) +[BITS 16] + +setup_ap: + ; Initialize segment register + mov ax, cs ; Code segment and... + mov ds, ax ; .. data segment should point to the same segment + ; (we don't use stack / stack segment) + + ; Disable interrupts + cli + ; Disable non maskable interrupts (NMI) + mov al, 0x80 + out 0x70, al + + ; load temporary real mode Global Descriptor Table (GDT) + lgdt [setup_ap_gdtd - setup_ap] + + ; Switch to protected mode: + ; enable protected mode bit (1 << 0) in control register 0 + mov eax, cr0 + or eax, 1 + mov cr0, eax + ; Far jump to 32 bit `startup_ap` function + jmp dword 0x08:startup_ap + +; memory reserved for temporary real mode GDT +; initialized by ApplicationProcessor::relocateSetupCode() +align 4 +setup_ap_gdt: + dq 0,0,0,0,0 ; reserve memory for at least 5 GDT entries + +; memory reserved for temporary real mode GDT descriptor +; initialized by ApplicationProcessor::relocateSetupCode() +setup_ap_gdtd: + dw 0,0,0,0,0 ; reserve memory for GDT descriptor + +[SECTION .text] + +[BITS 32] + +; Segment initialization defined in `boot/startup.asm` +[EXTERN segment_init] + +; protected mode (32 bit) startup code for application processor +startup_ap: + ; reload all segment selectors (since they still point to the real mode GDT) + mov ax, 0x10 + mov ds, ax + mov es, ax + mov fs, ax + mov gs, ax + mov ss, ax + + ; Use same segment initialization function as bootstrap processor + jmp segment_init + diff --git a/boot/startup_ap.cc b/boot/startup_ap.cc new file mode 100644 index 0000000..9913b47 --- /dev/null +++ b/boot/startup_ap.cc @@ -0,0 +1,82 @@ +#include "startup_ap.h" + +#include "../arch/core_interrupt.h" +#include "../arch/gdt.h" +#include "../arch/lapic.h" +#include "../arch/pit.h" +#include "../debug/assert.h" +#include "../debug/output.h" +#include "../utils/size.h" +#include "../utils/string.h" + +namespace ApplicationProcessor { + +// Make sure that the RELOCATED_SETUP is in low memory (< 1 MiB) +static_assert((RELOCATED_SETUP & ~0x000ff000) == 0, + "Not a valid 1 MB address for RELOCATED_SETUP!"); + +/*! \brief Temporary Global Descriptor Table + * + * Blue print, to be copied into real mode code + */ +constinit GDT::SegmentDescriptor ap_gdt[] = { + // nullptr-Deskriptor + {}, + + // XXX: Can't we just use GDT::protected_mode? + // code segment + GDT::SegmentDescriptor::Segment(0, UINT32_MAX, true, 0, GDT::SIZE_32BIT), + + // data segment + GDT::SegmentDescriptor::Segment(0, UINT32_MAX, false, 0, GDT::SIZE_32BIT), +}; + +void relocateSetupCode() { + // Relocated setup code + memcpy(reinterpret_cast(RELOCATED_SETUP), &___SETUP_AP_START__, + &___SETUP_AP_END__ - &___SETUP_AP_START__); + + // Adjust GDT: + // Calculate offset for real mode GDT and GDT descriptor + uintptr_t ap_gdt_offset = reinterpret_cast(&setup_ap_gdt) - + reinterpret_cast(&___SETUP_AP_START__); + uintptr_t ap_gdtd_offset = reinterpret_cast(&setup_ap_gdtd) - + reinterpret_cast(&___SETUP_AP_START__); + + // Copy blue print of real mode GDT to the relocated memory + void* relocated_ap_gdt = + reinterpret_cast(RELOCATED_SETUP + ap_gdt_offset); + memcpy(relocated_ap_gdt, &ap_gdt, sizeof(ap_gdt)); + + // Calculate GDT descriptor for relocated address + GDT::Pointer* relocated_ap_gdtd = + reinterpret_cast(RELOCATED_SETUP + ap_gdtd_offset); + relocated_ap_gdtd->set(relocated_ap_gdt, size(ap_gdt)); +} + +void boot(void) { + assert(!Core::Interrupt::isEnabled() && + "Interrupts should not be enabled before APs have booted!"); + + // Relocate setup code + relocateSetupCode(); + + // Calculate Init-IPI vector based on address of relocated setup_ap() + uint8_t vector = RELOCATED_SETUP >> 12; + + // Send Init-IPI to all APs + LAPIC::IPI::sendInit(); + + // wait at least 10ms + PIT::delay(10000); + + // Send Startup-IPI twice + DBG_VERBOSE << "Sending STARTUP IPI #1" << endl; + LAPIC::IPI::sendStartup(vector); + // wait at least 200us + PIT::delay(200); + + DBG_VERBOSE << "Sending STARTUP IPI #2" << endl; + LAPIC::IPI::sendStartup(vector); +} +} // namespace ApplicationProcessor diff --git a/boot/startup_ap.h b/boot/startup_ap.h new file mode 100644 index 0000000..dbd3c01 --- /dev/null +++ b/boot/startup_ap.h @@ -0,0 +1,114 @@ +/*! \file + * \brief Startup of additional cores, the application processors (APs) + */ + +#pragma once +#include "../compiler/fix.h" +#include "../types.h" + +/*! \brief Application Processor Boot + * + * Interface to boot the APs + */ +namespace ApplicationProcessor { +/*! \brief Address (below 1 MiB) to which the setup code gets relocated + */ +constexpr uintptr_t RELOCATED_SETUP = 0x40000; + +/*! \brief Relocate the real mode setup code + * + * The application processors (APs) start in real mode, which means that your + * setup code must be placed within the first megabyte -- your operating system + * resides currently at a much higher address (16 MiB), so the code has to be + * copied down there first. + * + * Luckily, the code in `setup_ap()` can be relocated by copying -- because it + * does not use any absolute addressing (except when jumping to the protected + * mode function `startup_ap()`). + * The function must be copied to the address of \ref RELOCATED_SETUP (0x40000), + * so that the APs can start there. + * + * The memory section contains a reserved area for the \ref GDT and its + * descriptor, which has to be assigned first with the contents of \ref ap_gdt. + * + * \note You could also tell the linker script to put the code directly + * at the appropriate place, but unfortunately the Qemu multiboot + * implementation (via `-kernel` parameter) can't handle it properly. + */ +void relocateSetupCode(); + +/*! \brief Boot all application processors + * + * Performs relocation by calling \ref relocateSetupCode() + * + * \see [ISDMv3, 8.4.4.2 Typical AP Initialization + * Sequence](intel_manual_vol3.pdf#page=276) + */ +void boot(); +} // namespace ApplicationProcessor + +/*! \brief Begin of setup code for application processors + * + * The setup code has to switch from real mode (16 bit) to protected mode (32 + * bit), hence it is written in assembly and must be executed in low memory (< 1 + * MiB). + * + * After kernel start the code is somewhere above 16 MiB (the bootstrap + * processor was already launched in protected mode by the boot loader). + * Therefore this symbol is required for relocate the code to the position + * specified by \ref ApplicationProcessor::RELOCATED_SETUP. + * + * Luckily, the `setup_ap` code in `boot/startup_ap.asm` is rather simple and + * doesn't depend on absolute addressing -- and is therefore relocatable. + * + * Relocation is done by the function \ref + * ApplicationProcessor::relocateSetupCode() + * + * The `___SETUP_AP_START__` symbol is defined in the linker script + * (`compiler/section.ld`) + */ +extern char ___SETUP_AP_START__; + +/*! \brief End of startup code for application processors + * + * This Symbol is defined in the linker script (`compiler/section.ld`) + */ +extern char ___SETUP_AP_END__; + +/*! \brief Memory reserved for a temporary real mode GDT + * within the relocatable memory area of the setup code + */ +extern char setup_ap_gdt; + +/*! \brief Memory reserved for a temporary real mode GDT descriptor + * within the relocatable memory area of the setup code + */ +extern char setup_ap_gdtd; + +/*! \brief Entry point for application processors + * + * Unlike the bootstrap processor, the application processors have not been + * setup by the boot loader -- they start in `Real Mode` (16 bit) and have to be + * switched manually to `Protected Mode` (32 bit). + * This is exactly what this real mode function does, handing over control + * to the (32 bit) function \ref startup_ap() + * + * This code is written is assembly (`boot/startup_ap.asm`) and relocated by + * \ref ApplicationProcessor::relocateSetupCode() during + * \ref ApplicationProcessor::boot() + */ +extern "C" void setup_ap() ERROR_ON_CALL( + "The setup function for application processors shall never be called from " + "your code!"); + +/*! \brief Startup for application processors + * \ingroup Startup + * + * This function behaves similar to \ref startup_bsp(): + * Initializes stack pointer, + * switches to long mode + * and calls the C++ \ref kernel_init function + */ +extern "C" void startup_ap() ERROR_ON_CALL( + "The startup function for application processors shall never be called " + "from your code!"); diff --git a/compiler/fix.h b/compiler/fix.h new file mode 100644 index 0000000..a50e918 --- /dev/null +++ b/compiler/fix.h @@ -0,0 +1,13 @@ +/*! \file + * \brief Compiler-dependent fixes & idiosyncrasies + */ + +#pragma once +#include "../types.h" + +#if defined(__GNUC__) && !defined(__clang__) +// Only GCC understands the error attribute +#define ERROR_ON_CALL(MSG) __attribute__((error(MSG))); +#else +#define ERROR_ON_CALL(MSG) +#endif diff --git a/compiler/libc.cc b/compiler/libc.cc new file mode 100644 index 0000000..6bbd0ef --- /dev/null +++ b/compiler/libc.cc @@ -0,0 +1,45 @@ +#include "libc.h" + +/*! \brief Function pointer for initialization/finalization functions for global + * objects required since GCC 4.7 and later. + * + * These symbols appear kind of magically due to the compiler + */ +extern void (*__preinit_array_start[])(); +extern void (*__preinit_array_end[])(); +extern void (*__init_array_start[])(); +extern void (*__init_array_end[])(); +extern void (*__fini_array_start[])(); +extern void (*__fini_array_end[])(); + +namespace CSU { + +void initializer() { + const unsigned int preinit_size = __preinit_array_end - __preinit_array_start; + for (unsigned int i = 0; i != preinit_size; ++i) { + (*__preinit_array_start[i])(); + } + + const size_t size = __init_array_end - __init_array_start; + for (size_t i = 0; i < size; i++) { + (*__init_array_start[i])(); + } +} + +void finalizer() { + const unsigned int fini_size = __fini_array_end - __fini_array_start; + for (unsigned int i = 0; i != fini_size; ++i) { + (*__fini_array_start[i])(); + } +} + +} // namespace CSU + +extern "C" int atexit(void (*func)(void)) { + // Registers a function that will be executed on exit. + // We simply ignore those functions, as we don't need them for our operating + // systems. + (void)func; + + return 0; +} diff --git a/compiler/libc.h b/compiler/libc.h new file mode 100644 index 0000000..dad1d44 --- /dev/null +++ b/compiler/libc.h @@ -0,0 +1,23 @@ +/*! \file + * \brief Initialization functions for global objects required by the compiler + */ + +#pragma once +#include "../types.h" + +/*! \brief C StartUp (CSU) + * required by the compiler and provided by the c standard library + */ +namespace CSU { + +/*! \brief Call global constructors and initialization functions + * (this is usually done by __libc_csu_init) + */ +void initializer(); + +/*! \brief Call global destructors and finalizer functions + * (this is usually done by __libc_csu_fini) + */ +void finalizer(); + +} // namespace CSU diff --git a/compiler/libcxx.cc b/compiler/libcxx.cc new file mode 100644 index 0000000..a2d3755 --- /dev/null +++ b/compiler/libcxx.cc @@ -0,0 +1,21 @@ +/*! \file + * \brief C++ runtime support functions + */ + +#include "../types.h" + +void* operator new(size_t, void* place) { return place; } + +void operator delete(void* ptr) { (void)ptr; } + +void operator delete(void* ptr, size_t size) { + (void)ptr; + (void)size; +} + +extern "C" [[noreturn]] void __cxa_pure_virtual() { + // Pure virtual function was called -- this if obviously not valid, + // therefore we wait infinitely. + while (true) { + } +} diff --git a/compiler/sections.ld b/compiler/sections.ld new file mode 100644 index 0000000..843fbb7 --- /dev/null +++ b/compiler/sections.ld @@ -0,0 +1,107 @@ +/* Entry in our OS -- label 'startup_bsp' in file boot/startup.asm */ +ENTRY(startup_bsp) + +SECTIONS + { + /* start address of our kernel */ + . = 16M; + + ___KERNEL_START___ = .; + + .boot : + { + /* Multiboot Header should be at the very beginning */ + *(.multiboot_header) + } + + ___KERNEL_TEXT_START___ = .; + + .text : + { + *(".text") + *(".text$") + *(".init") + *(".fini") + *(".gnu.linkonce.*") + KEEP(*(.note.gnu.build-id)) + } + + /* lists containing the start address of global constructors and destructors (generated by the compiler) */ + .preinit_array : + { + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + } + .init_array : + { + PROVIDE_HIDDEN (__init_array_start = .); + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + } + .fini_array : + { + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP (*(SORT(.fini_array.*))) + KEEP (*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + } + + ___KERNEL_TEXT_END___ = .; + + .data : + { + *(".data") + *(".data$") + *(".rodata") + ___CTOR_LIST__ = .; + *(".ctors") + *(".ctor") + ___CTOR_LIST_END__ = .; + ___DTOR_LIST__ = .; + *(".dtors") + *(".dtor") + ___DTOR_LIST_END__ = .; + *(".got") + *(".got.plt") + *(".eh_frame") + *(".eh_fram") + *(".jcr") + } + + /* Start for application processors, relocated by APIC::init() + * to a below 1 MB address to boot from real mode. + * It is possible to let the linker place it at a below 1 MB address, + * while all the rest starts at 16 MB. This will work for multiboot + * compliant boot loader like GRUB and PXELINUX, however, + * the qemu boot loader cannot handle such ELF files (yet)... + * That's why we have to do it in our software */ + .setup_ap_seg ALIGN(0x10) : + { + ___SETUP_AP_START__ = .; + *(".setup_ap_seg") + *(".setup_ap_seg$") + } + ___SETUP_AP_END__ = .; + + .bss : + { + *(".bss") + *(".bss.*") + *(COMMON) + } + ___KERNEL_END___ = .; + + /DISCARD/ : + { + *(".note") + *(".comment") +/* Keep debug information + *(".debug_line") + *(".debug_info") + *(".debug_abbrev") + *(".debug_aranges") +*/ + } + } diff --git a/debug/assert.cc b/debug/assert.cc new file mode 100644 index 0000000..96dd7c3 --- /dev/null +++ b/debug/assert.cc @@ -0,0 +1,14 @@ +#include "assert.h" + +[[noreturn]] void assertion_failed(const char* exp, const char* func, + const char* file, int line) { + (void)exp; + (void)func; + (void)file; + (void)line; + // TODO: Print error message (in debug window) + // TODO: Then stop the current core permanently + // Use appropriate method from class Core to do so. + while (true) { + } // wait forever so we can mark this as [[noreturn]] +} diff --git a/debug/assert.h b/debug/assert.h new file mode 100644 index 0000000..2e30173 --- /dev/null +++ b/debug/assert.h @@ -0,0 +1,75 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief Contains several macros usable for making assertions + * + * Depending on the type of assertion (either static or at runtime), a failing + * assertion will trigger an error. For static assertion, this error will be + * shown at compile time and abort compilation. Runtime assertions will trigger + * a message containing details about the error occurred and will make the CPU + * die. + */ + +/*! + * \defgroup debug Debugging functions + */ + +#pragma once +#include "../types.h" + +#ifndef STRINGIFY +/*! \def STRINGIFY(S) + * \brief Converts a macro parameter into a string + * \ingroup debug + * \param S Expression to be converted + * \return stringified version of S + */ +#define STRINGIFY(S) #S +#endif + +/*! \def assert_size(TYPE, SIZE) + * \brief Statically ensure (at compile time) that a data type (or variable) + * has the expected size. + * + * \ingroup debug + * \param TYPE The type to be checked + * \param SIZE Expected size in bytes + */ +#define assert_size(TYPE, SIZE) \ + static_assert(sizeof(TYPE) == (SIZE), "Wrong size for " STRINGIFY(TYPE)) + +/*! \def assert(EXP) + * \brief Ensure (at execution time) an expression evaluates to `true`, print + * an error message and stop the CPU otherwise. + * + * \ingroup debug + * \param EXP The expression to be checked + */ +#ifdef NDEBUG +#define assert(EXP) ((void)0) +#else +#define assert(EXP) \ + do { \ + if (__builtin_expect(!(EXP), 0)) { \ + assertion_failed(STRINGIFY(EXP), __func__, __FILE__, __LINE__); \ + } \ + } while (false) + +/*! \brief Handles a failed assertion + * + * This function will print a message containing further information about the + * failed assertion and stops the current CPU permanently. + * + * \note This function should never be called directly, but only via the macro + * `assert`. + * + * \todo(11) Implement Remainder of Method (output & CPU stopping) + * + * \param exp Expression that did not hold + * \param func Name of the function in which the assertion failed + * \param file Name of the file in which the assertion failed + * \param line Line in which the assertion failed + */ +[[noreturn]] void assertion_failed(const char* exp, const char* func, + const char* file, int line); +#endif diff --git a/debug/copystream.h b/debug/copystream.h new file mode 100644 index 0000000..4e9d521 --- /dev/null +++ b/debug/copystream.h @@ -0,0 +1,44 @@ +/*! \file + * \brief \ref CopyStream duplicates \ref OutputStream "output streams" + */ + +#pragma once +#include "../object/outputstream.h" +#include "../types.h" + +/*! \brief Duplicate all data passed by the stream operator to two \ref + * OutputStream "output streams" + * \ingroup io + * + * Can be used as replacement for any \ref OutputStream -- for example, + * forwarding the \ref DBG output simultaneously to screen (\ref TextStream) and + * serial console (\ref SerialStream). + * + */ +class CopyStream : public OutputStream { + /*! \brief First recipient + */ + OutputStream* first; + + /*! \brief Second recipient + */ + OutputStream* second; + + public: + /*! \brief Constructor + * + * \param first First recipient for output passed to this object + * \param second Second recipient for output passed to this object + */ + CopyStream(OutputStream* first, OutputStream* second) + : first(first), second(second) {} + + /*! \brief Redirect the buffer to both streams and flush them, too. + */ + void flush() override { + buffer[pos] = '\0'; // make sure buffer will only be printed until pos. + *first << buffer << ::flush; + *second << buffer << ::flush; + pos = 0; + } +}; diff --git a/debug/kernelpanic.h b/debug/kernelpanic.h new file mode 100644 index 0000000..f83fbd8 --- /dev/null +++ b/debug/kernelpanic.h @@ -0,0 +1,27 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief Macro to print an error message and stop the current core. + */ + +#pragma once +#include "../types.h" + +/*! \def kernelpanic + * \brief Print an error message in the debug window and \ref Core::die "stop + * the current core" + * + * \param MSG error message + * \ingroup debug + */ +#define kernelpanic(MSG) \ + do { \ + DBG << "PANIC: '" << (MSG) << "' in " << __func__ << " @ " << __FILE__ \ + << ":" << __LINE__ << ") - CPU stopped." << endl; \ + Core::die(); \ + } while (0) + +// The includes are intentionally placed at the end, so the macro can be used +// inside those included files as well. +#include "../arch/core.h" +#include "./output.h" diff --git a/debug/nullstream.cc b/debug/nullstream.cc new file mode 100644 index 0000000..1cba8b9 --- /dev/null +++ b/debug/nullstream.cc @@ -0,0 +1,4 @@ +#include "nullstream.h" + +// Instance +NullStream nullstream; diff --git a/debug/nullstream.h b/debug/nullstream.h new file mode 100644 index 0000000..e922dba --- /dev/null +++ b/debug/nullstream.h @@ -0,0 +1,45 @@ +/*! \file + * \brief \ref NullStream is a stream discarding everything + */ + +#pragma once +#include "../object/outputstream.h" +#include "../types.h" + +/*! \brief Ignore all data passed by the stream operator + * \ingroup io + * + * Can be used instead of the \ref OutputStream if (for debugging reasons) all + * output should be ignored, e.g. for \ref DBG_VERBOSE + * + * By using template programming, a single generic methods is sufficient + * (which simply discard everything). + */ +class NullStream { + /*! \brief Check if type is supported by output stream + */ + template + auto check(T v, OutputStream* p = nullptr) -> decltype(*p << v, void()) {} + + public: + /*! \brief Empty default constructor + */ + NullStream() {} + + /*! \brief Generic stream operator for any data type + * + * Uses template meta programming for a generic & short solution + * + * \tparam T Type of data to ignore + * \param value data to be ignore + * \return Reference to the \ref NullStream object allowing concatenation of + * operators + */ + template + NullStream& operator<<(T value) { + check(value); + return *this; + } +}; + +extern NullStream nullstream; diff --git a/debug/output.h b/debug/output.h new file mode 100644 index 0000000..4dc7530 --- /dev/null +++ b/debug/output.h @@ -0,0 +1,93 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief Debug macros enabling debug output on a separate window for each + * core. + */ + +#pragma once +#include "../types.h" + +/*! \def DBG_VERBOSE + * \brief An output stream, which is only displayed in the debug window in + * verbose mode + * + * \note If a serial console has been implemented, the output can be redirected + * to the serial stream instead (by changing the macro) -- this makes the + * (usually) very large output more readable (since it allows scrolling + * back) + */ +#ifdef VERBOSE +// If VERBOSE is defined, forward everything to \ref DBG +#define DBG_VERBOSE DBG +#else +// Otherwise sent everything to the NullStream (which will simply discard +// everything) +#define DBG_VERBOSE nullstream +// in this case we have to include the null stream +#include "./nullstream.h" +#endif + +/*! \def DBG + * \brief An output stream, which is displayed in the debug window of the core + * it was executed on + * + * In single core (\OOStuBS) this is just an alias to the debug window object + * `dout`. + */ +/*! However, on a multi core system a debug window for each core is + * required, therefore `dout` has to be an \ref TextStream object array with the + * core ID as array index -- the selection is done via Core::getID() + * + * \warning In case of a very unfavorable scheduling, it is theoretically + * possible that the debug output in a multi core system is displayed + * on the wrong (previous) core. + */ +#define DBG nullstream + +#include "../arch/core.h" +#include "../device/textstream.h" + +/*! \brief Debug window for the CGA screen + * + * Debug output using \ref DBG like + * `DBG << "var = " << var << endl` + * should be displayed in window dedicated to the core it is executed on. + * + * While this is quite easy on single core systems like \OOStuBS -- they only + * require a single \ref TextStream object called `dout` -- multi core systems + * like \MPStuBS need an object array with one window per core. + * In the latter case direct list initialization can be used: + * + * \code{.cpp} + * TextStream dout[Core::MAX]{ + * {0, 40, 17, 21}, // Debug window for core 0, like TextStream(0, 40, 17, + * 21) {40, 80, 17, 21}, // Debug window for core 1, like TextStream(40, 80, + * 17, 21) + * //... + * }; + * \endcode + * + * The debug windows in should be located right below the normal output window + * without any overlap and should be able to display at least 3 lines. + * In \MPStuBS, two windows can be placed side-by-side, having 40 columns each. + * + * \todo(11) Define `dout` + */ +extern TextStream dout[Core::MAX]; + +/*! \brief Debug window with copy function to serial + * + * Provide an additional layer to also ouput debug prints to serial. + * While this is a simple CopyStream pointer in the single core case, it is + * an array in the multi core case, which consists of three TextStreams and + * one CopyStream. + * For that, construction is done like: + * + * \code{.cpp} + * OutputStream* copyout[Core::MAX]{&dout[0], &dout[1], ...} + * \endcode + * + * \todo(11) Define `copyout` + */ +extern OutputStream* copyout[Core::MAX]; diff --git a/device/keydecoder.cc b/device/keydecoder.cc new file mode 100644 index 0000000..7911b42 --- /dev/null +++ b/device/keydecoder.cc @@ -0,0 +1,122 @@ +#include "keydecoder.h" + +#include "ps2controller.h" + +// Constants used for key decoding +const unsigned char BREAK_BIT = 0x80; +const unsigned char PREFIX_1 = 0xe0; +const unsigned char PREFIX_2 = 0xe1; + +Key KeyDecoder::decode(unsigned char code) { + Key key = modifier; + + // All keys that are introduced by the MF II keyboard (compared to the older + // AT keyboard) always send a prefix value as first byte. + if (code == PREFIX_1 || code == PREFIX_2) { + prefix = code; + } else { + // Releasing a key is, for us, only important for the modifier keys such as + // SHIFT, CTRL and ALT, For other, non-modifier keys, we ignore the break + // code. + bool pressed = (code & BREAK_BIT) == 0; + + // A key's break code is identical to its make code with an additionally set + // BREAK_BIT + Key::Scancode scancode = static_cast(code & (~BREAK_BIT)); + + // We ignore "new" special keys, such as the Windows key + if (scancode < Key::Scancode::KEYS) { + // save state + status[scancode] = pressed; + + // Take a closer look at modifier make and break events + bool isModifier = true; + switch (scancode) { + // both shifts are handled equally + case Key::Scancode::KEY_LEFT_SHIFT: + case Key::Scancode::KEY_RIGHT_SHIFT: + modifier.shift = pressed; + break; + + case Key::Scancode::KEY_LEFT_ALT: + if (prefix == PREFIX_1) { + modifier.alt_right = pressed; + } else { + modifier.alt_left = pressed; + } + break; + + case Key::Scancode::KEY_LEFT_CTRL: + if (prefix == PREFIX_1) { + modifier.ctrl_right = pressed; + } else { + modifier.ctrl_left = pressed; + } + break; + + default: + isModifier = false; + } + + // For keys other than modifiers, we only care about the make code + if (pressed && !isModifier) { + switch (scancode) { + case Key::Scancode::KEY_CAPS_LOCK: + modifier.caps_lock ^= 1; + setLed(PS2Controller::LED_CAPS_LOCK, modifier.caps_lock); + break; + + case Key::Scancode::KEY_SCROLL_LOCK: + modifier.scroll_lock ^= 1; + setLed(PS2Controller::LED_SCROLL_LOCK, modifier.scroll_lock); + break; + + case Key::Scancode::KEY_NUM_LOCK: // Can be both NumLock and pause + // On old keyboards, the pause functionality was only accessible by + // pressing Ctrl+NumLock. Modern MF-II keyboards therefore send + // exactly this code combination when the pause key was pressed. + // Normally, the pause key does not provide an ASCII code, but we + // check that anyway. In either case, we're now done decoding. + if (modifier.ctrl_left) { // pause key + key.scancode = scancode; + } else { // NumLock + modifier.num_lock ^= 1; + setLed(PS2Controller::LED_NUM_LOCK, modifier.num_lock); + } + break; + + // Special case scan code 53: This code is used by both the minus key + // on the main keyboard and the division key on the number block. When + // the division key was pressed, we adjust the scancode accordingly. + case Key::Scancode::KEY_SLASH: + if (prefix == PREFIX_1) { + key.scancode = Key::Scancode::KEY_DIV; + key.shift = true; + } else { + key.scancode = scancode; + } + break; + + default: + key.scancode = scancode; + + // When NumLock is enabled and a key on the keypad was pressed, we + // want return the ASCII and scan codes of the corresponding + // numerical key instead of the arrow keys. The keys on the cursor + // block (prefix == PREFIX_1), however, should remain usable. + // Therefore, as a little hack, we deactivate the NumLock for these + // keys. + if (modifier.num_lock && prefix == PREFIX_1) { + key.num_lock = false; + } + } + } + } + + // The prefix is only valid for the immediately following code, which was + // just handled. + prefix = 0; + } + + return key; +} diff --git a/device/keydecoder.h b/device/keydecoder.h new file mode 100644 index 0000000..c45d145 --- /dev/null +++ b/device/keydecoder.h @@ -0,0 +1,40 @@ +/*! \file + * \brief \ref KeyDecoder decodes a keystroke to the corresponding \ref Key + * object + */ + +#pragma once +#include "../object/key.h" +#include "../types.h" + +/*! \brief Decoder for \ref ps2keyboardset1 "keyboard codes" received from the + * \ref PS2Controller + * \ingroup io + * + * Extracts the make and break codes, modifier and scan codes from the pressed + * key. + */ +class KeyDecoder { + unsigned char prefix; ///< Prefix byte for keys + Key modifier; ///< activated modifier keys (e.g., caps lock) + + public: + /*! \brief Current state (pressed or released) of all keys. + */ + bool status[Key::Scancode::KEYS]; + + /*! \brief Default constructor + */ + KeyDecoder() {} + + /*! \brief Interprets the \ref ps2keyboardset1 "make and break codes" + * received from the keyboard and derives the corresponding scan code and + * further information about other pressed keys, such as \key{shift} and + * \key{ctrl}. + * + * \param code Byte from Keyboard to decode + * \return Pressed key (\ref Key::valid returns `false` if the key is not yet + * complete) + */ + Key decode(unsigned char code); +}; diff --git a/device/ps2controller.cc b/device/ps2controller.cc new file mode 100644 index 0000000..11d46b1 --- /dev/null +++ b/device/ps2controller.cc @@ -0,0 +1,130 @@ +#include "ps2controller.h" + +#include "../arch/core_interrupt.h" +#include "../arch/ioport.h" +#include "../compiler/fix.h" +#include "../debug/output.h" +#include "keydecoder.h" + +namespace PS2Controller { + +// I/O Ports of the PS2 Controller +static const IOPort ctrl_port( + 0x64); ///< Access status- (read) and command (write) register +static const IOPort data_port(0x60); ///< Access PS/2 device [keyboard] output- + ///< (read) and input (write) buffer +/* The buffers are used to communicate with the controller or the connected + * PS/2 devices alike: + * - For the output buffer, the controller decides to which PS/2 device the + * data gets forwarded to -- by default it is the primary PS/2 device + * (keyboard). + * - The source device from which the data was gathered can be determined using + * the status flag (\ref IS_MOUSE). + * + * Please also note, that the naming of the buffer may be a bit contra-intuitive + * since it is the perspective of the PS/2 controller due to historical reasons. + */ + +// Key decoder (stores the state of the modifier keys) +static KeyDecoder key_decoder; + +// To store the current state of the Keyboard LEDs +static uint8_t leds = 0; + +/*! \brief Flags in the PS/2 controller status register + */ +enum Status { + HAS_OUTPUT = 1 << 0, ///< Output buffer non-empty? + INPUT_PENDING = 1 << 1, ///< Is input buffer full? + SYSTEM_FLAG = 1 << 2, ///< set on soft reset, cleared on power up + IS_COMMAND = 1 << 3, ///< Is command Byte? (otherwise data) + IS_MOUSE = 1 << 5, ///< Mouse output has data + TIMEOUT_ERROR = 1 << 6, ///< Timeout error + PARITY_ERROR = 1 << 7 ///< Parity error +}; + +/*! \brief Commands to be send to the Keyboard + */ +enum KeyboardCommand : uint8_t { + KEYBOARD_SET_LED = + 0xed, ///< Set the LED (according to the following parameter byte) + KEYBOARD_SEND_ECHO = 0xee, ///< Send an echo packet + KEYBOARD_SET_SPEED = 0xf3, ///< Set the repeat rate (according to the + ///< following parameter byte) + KEYBOARD_ENABLE = 0xf4, ///< Enable Keyboard + KEYBOARD_DISABLE = 0xf5, ///< Disable Keyboard + KEYBOARD_SET_DEFAULT = 0xf6, ///< Load defaults +}; + +/*! \brief Replies + */ +enum Reply { + ACK = 0xfa, ///< Acknowledgement + RESEND = 0xfe, ///< Request to resend (not required to implement) + ECHO = 0xee ///< Echo answer +}; + +/*! \brief Commands for the PS/2 Controller + * + * These commands are processed by the controller and *not* send to + * keyboard/mouse. They have to be written into the command register. + */ +enum ControllerCommand { + CONTROLLER_GET_COMMAND_BYTE = 0x20, ///< Read Command Byte of PS/2 Controller + CONTROLLER_SET_COMMAND_BYTE = + 0x60, ///< Write Command Byte of PS/2 Controller + CONTROLLER_MOUSE_DISABLE = 0xa7, ///< Disable mouse interface + CONTROLLER_MOUSE_ENABLE = 0xa8, ///< Enable mouse interface + CONTROLLER_KEYBOARD_DISABLE = 0xad, ///< Disable keyboard interface + CONTROLLER_KEYBOARD_ENABLE = 0xae, ///< Enable keyboard interface + CONTROLLER_SEND_TO_MOUSE = 0xd4, ///< Send parameter to mouse device +}; + +/*! \brief Send a command or data to a connected PS/2 device + * + * The value must only be written into the input buffer after the previously + * written values have been fetched (\ref INPUT_PENDING in the status register). + * + * \todo(11) Implement method + * + * \param value data to be sent + */ +[[maybe_unused]] static void sendData(uint8_t value) { + // TODO: You have to implement this method + (void)value; +} + +void init() { + // Switch all LEDs off (on many PCs NumLock is turned on after power up) + setLed(LED_CAPS_LOCK, false); + setLed(LED_SCROLL_LOCK, false); + setLed(LED_NUM_LOCK, false); + + // Set to maximum speed & minimum delay + setRepeatRate(SPEED_30_0CPS, DELAY_250MS); +} + +bool fetch(Key &pressed) { + // TODO: You have to implement this method + (void)pressed; + return false; +} + +void setRepeatRate(Speed speed, Delay delay) { + (void)speed; + (void)delay; +} + +void setLed(enum LED led, bool on) { + if (on) { + leds |= led; + } else { + leds &= ~led; + } + sendData(KEYBOARD_SET_LED); // Command for the Keyboard + sendData(leds); // Parameter +} + +void drainBuffer() {} + +} // namespace PS2Controller diff --git a/device/ps2controller.h b/device/ps2controller.h new file mode 100644 index 0000000..62a19f6 --- /dev/null +++ b/device/ps2controller.h @@ -0,0 +1,153 @@ +/*! \file + * \brief \ref PS2Controller "PS/2 Controller" (Intel 8042, also known as + * Keyboard Controller) + */ + +#pragma once +#include "../object/key.h" +#include "../types.h" + +/*! \brief PS/2 Controller + * \ingroup io + * + * Initializes the PS/2 devices (Keyboard and optional Mouse), and + * determines both the scan code and ASCII character of a pressed key from the + * transmitted make and break codes using the \ref KeyDecoder. + * + * \note This controller is also known as Intel 8042 (nowadays integrated in + * the mainboard) or *Keyboard Controller*. + * But to avoid confusion with the actual Keyboard and since we use the + * PS/2-compatible mode to support the Mouse as well, the name + * PS/2 Controller was chosen for the sake of simplicity. + * + * \note Since modern PCs sometimes don't have an PS/2 connector, USB keyboards + * and mice are emulated as PS/2 device with USB Legacy Support. + */ +namespace PS2Controller { +/*! \brief Initialization of connected devices + * + * All status LEDs of the keyboard are switched off and the repetition rate is + * set to maximum speed. + * + * Later the \ref IOAPIC is configured to receive corresponding interrupts. + * + * \note The keyboard interrupts should be configured as \ref IOAPIC::LEVEL + * "level triggered". According to the standard we would have to check the + * corresponding entry in + * \ref ACPI::MADS::Interrupt_Source_Override and use these values. Most + * likely this would suggest an \ref IOAPIC::EDGE "edge-triggered mode" -- which + * would work as well. However, using a \ref IOAPIC::LEVEL "level-triggered + * mode" is more forgiving because it resends the interrupt request even if an + * interrupt was lost (e.g. the required handling, retrieving the buffer entry, + * was not performed). + * + * \todo(12) Register with \ref IOAPIC + */ +void init(); + +/*! \brief Retrieve the keyboard event + * + * Retrieves make and brake events from the keyboard. + * If a valid (non special) key was pressed, the scan code is determined + * using \ref KeyDecoder::decode into a \ref Key object. + * Events on special keys like \key{Shift}, \key{Alt}, \key{CapsLock} etc. are + * stored (in \ref KeyDecoder) and applied on subsequent keystrokes, while no + * valid key is retrieved. + * + * Mouse events are ignored. + * + * \todo(11) Implement Method + * + * \todo(12) Adjust method (unless it is already non-blocking) + * + * \param pressed Reference to an object which will contain the pressed \ref Key + * on success + * \return `true` if a valid key was decoded + */ +bool fetch(Key &pressed); + +/*! \brief Delay before the keyboard starts repeating sending a pressed key + */ +enum Delay { + DELAY_250MS = 0, ///< Delay of 0.25s + DELAY_500MS = 1, ///< Delay of 0.5s + DELAY_750MS = 2, ///< Delay of 0.75s + DELAY_1000MS = 3 ///< Delay of 1s +}; + +/*! \brief Repeat Rate of Characters + * + * \see \ref ps2keyboard + */ +enum Speed { + SPEED_30_0CPS = 0x00, ///< 30 characters per second + SPEED_26_7CPS = 0x01, ///< 26.7 characters per second + SPEED_24_0CPS = 0x02, ///< 24 characters per second + SPEED_21_8CPS = 0x03, ///< 12.8 characters per second + SPEED_20_7CPS = 0x04, ///< 20.7 characters per second + SPEED_18_5CPS = 0x05, ///< 18.5 characters per second + SPEED_17_1CPS = 0x06, ///< 17.1 characters per second + SPEED_16_0CPS = 0x07, ///< 16 characters per second + SPEED_15_0CPS = 0x08, ///< 15 characters per second + SPEED_13_3CPS = 0x09, ///< 13.3 characters per second + SPEED_12_0CPS = 0x0a, ///< 12 characters per second + SPEED_10_9CPS = 0x0b, ///< 10.9 characters per second + SPEED_10_0CPS = 0x0c, ///< 10 characters per second + SPEED_09_2CPS = 0x0d, ///< 9.2 characters per second + SPEED_08_6CPS = 0x0e, ///< 8.6 characters per second + SPEED_08_0CPS = 0x0f, ///< 8 characters per second + SPEED_07_5CPS = 0x10, ///< 7.5 characters per second + SPEED_06_7CPS = 0x11, ///< 6.7 characters per second + SPEED_06_0CPS = 0x12, ///< 6 characters per second + SPEED_05_5CPS = 0x13, ///< 5.5 characters per second + SPEED_05_0CPS = 0x14, ///< 5 characters per second + SPEED_04_6CPS = 0x15, ///< 4.6 characters per second + SPEED_04_3CPS = 0x16, ///< 4.3 characters per second + SPEED_04_0CPS = 0x17, ///< 4 characters per second + SPEED_03_7CPS = 0x18, ///< 3.7 characters per second + SPEED_03_3CPS = 0x19, ///< 3.3 characters per second + SPEED_03_0CPS = 0x1a, ///< 3 characters per second + SPEED_02_7CPS = 0x1b, ///< 2.7 characters per second + SPEED_02_5CPS = 0x1c, ///< 2.5 characters per second + SPEED_02_3CPS = 0x1d, ///< 2.3 characters per second + SPEED_02_1CPS = 0x1e, ///< 2.1 characters per second + SPEED_02_0CPS = 0x1f, ///< 2 characters per second +}; + +/*! \brief Configure the repeat rate of the keyboard + * + * \param delay configures how long a key must be pressed before the repetition + * begins. + * \param speed determines how fast the key codes should follow each other. + * Valid values are between `0` (30 characters per second) and + * `31` (2 characters per second). + */ +void setRepeatRate(Speed speed, Delay delay); + +/*! \brief Keyboard LEDs + */ +enum LED { + LED_SCROLL_LOCK = 1 << 0, ///< Scroll Lock + LED_NUM_LOCK = 1 << 1, ///< Num Lock + LED_CAPS_LOCK = 1 << 2, ///< Caps Lock +}; + +/*! \brief Enable or disable a keyboard LED + * + * \param led LED to enable or disable + * \param on `true` will enable the specified LED, `false` disable + */ +void setLed(enum LED led, bool on); + +/*! \brief Empties the keyboard buffer. + * + * The keyboard may not send any interrupts if the buffer is not empty. + * To prevent unhandled keystrokes (for example during boot) the buffer + * should be emptied once right before allowing keyboard interrupts + * (even if keystrokes might be lost). + * + * \todo(12) Implement method + */ +void drainBuffer(); + +} // namespace PS2Controller diff --git a/device/serialstream.cc b/device/serialstream.cc new file mode 100644 index 0000000..86a2585 --- /dev/null +++ b/device/serialstream.cc @@ -0,0 +1,30 @@ +#include "serialstream.h" + +SerialStream::SerialStream(ComPort port, BaudRate baud_rate, DataBits data_bits, + StopBits stop_bits, Parity parity) { + (void)port; + (void)baud_rate; + (void)data_bits; + (void)stop_bits; + (void)parity; +} + +void SerialStream::flush() {} + +void SerialStream::setForeground(Color c) { (void)c; } + +void SerialStream::setBackground(Color c) { (void)c; } + +void SerialStream::setAttribute(Attrib a) { (void)a; } + +void SerialStream::reset() {} + +void SerialStream::setPos(int x, int y) { + (void)x; + (void)y; +} + +void SerialStream::print(char* str, int length) { + (void)str; + (void)length; +} diff --git a/device/serialstream.h b/device/serialstream.h new file mode 100644 index 0000000..83450ee --- /dev/null +++ b/device/serialstream.h @@ -0,0 +1,143 @@ +/*! \file + * \brief \ref Serial \ref SerialStream "output stream" + */ + +#pragma once +#include "../arch/serial.h" +#include "../object/outputstream.h" +#include "../types.h" + +/*! \brief Console (VT100 compatible) via \ref Serial interface. + * \ingroup io + * + * This class allows to connect a VT100-compatible display terminal via + * the serial interface. + * + * The utility 'screen' can be used to attach a terminal to an interface + * at a specified connection speed: `screen /dev/ttyS0 115200` + * + * Color and position can be adjusted with the help of + * [escape + * codes](http://web.archive.org/web/20181008150037/http://www.termsys.demon.co.uk/vtansi.htm). + */ + +class SerialStream : public OutputStream, public Serial { + public: + /*! \brief Attributes + * can be used to influence the display of the output. + * + * \note The attributes might not be supported or have a different effect + * depending on the terminal emulator! + */ + enum Attrib { + RESET = 0, ///< Turn off character attributes + BRIGHT = 1, ///< Bold + DIM = 2, ///< Low intensity (dimmed) + UNDERSCORE = 4, ///< Underline + BLINK = 5, ///< Blink (slow) + REVERSE = 7, ///< Swap fore & background + HIDDEN = 8, ///< Concealed + }; + + /*! \brief Color codes + * + * Default VT100 supports eight colors for both foreground and background + * (later versions 256 [8 bit] and even true color [32 bit]). + * The actual color is affected by the attributes and can look significantly + * different depending on the terminal emulator. + */ + enum Color { + BLACK = 0, + RED = 1, + GREEN = 2, + YELLOW = 3, + BLUE = 4, + MAGENTA = 5, + CYAN = 6, + WHITE = 7 + }; + + /*! \brief Constructor for the VT100-compatible console + * + * Sets up the serial connection as well + * + * \todo(11) Implement Method + */ + explicit SerialStream(ComPort port = COM1, BaudRate baud_rate = BAUD_115200, + DataBits data_bits = DATA_8BIT, + StopBits stop_bits = STOP_1BIT, + Parity parity = PARITY_NONE); + + /*! \brief Method to output the buffer contents of the base class \ref + * Stringbuffer + * + * The method is automatically called when the buffer is full, + * but can also be called explicitly to force output of the current buffer. + * + * \todo(11) Implement Method + */ + void flush() override; + + /*! \brief Change foreground color (for subsequent output) + * + * \todo(11) Implement Method + * + * \param c Color + */ + void setForeground(Color c); + + /*! \brief Change background color (for subsequent output) + * + * \todo(11) Implement Method + * + * \param c Color + */ + void setBackground(Color c); + + /*! \brief Change text attribute (for subsequent output) + * + * \todo(11) Implement Method + * + * \param a Attribute + */ + void setAttribute(Attrib a); + + /*! \brief Reset terminal + * + * Clear screen, place cursor at the beginning and reset colors + * and attributes to the default value. + * + * \todo(11) Implement Method + */ + void reset(); + + /*! \brief Set the cursor position + * + * \param x Column in window + * \param y Row in window + * + * \todo(11) Implement Method + */ + void setPos(int x, int y); + + /*! \brief Display multiple characters in the window starting at the current + * cursor position + * + * This method can be used to output a string, starting at the current cursor + * position. Since the string does not need to contain a '\0' termination + * (as it is usually the case in C), the parameter `length` is required to + * specify the number of characters in the string. + * + * The text is displayed using the previously configured + * \ref setAttribute() "attributes", \ref setForeground() "fore-" + * and \ref setBackground "background" color. + * + * A line break will occur wherever the character `\n` is inserted + * in the text to be output (for compatibility reasons a `\r` is + * automatically appended). + * + * \param str String to output + * \param length length of string + */ + void print(char* str, int length); +}; diff --git a/device/textstream.cc b/device/textstream.cc new file mode 100644 index 0000000..bc8579e --- /dev/null +++ b/device/textstream.cc @@ -0,0 +1,12 @@ +#include "textstream.h" + +TextStream::TextStream(unsigned from_col, unsigned to_col, unsigned from_row, + unsigned to_row, bool use_cursor) { + (void)from_col; + (void)to_col; + (void)from_row; + (void)to_row; + (void)use_cursor; +} + +void TextStream::flush() {} diff --git a/device/textstream.h b/device/textstream.h new file mode 100644 index 0000000..7cce382 --- /dev/null +++ b/device/textstream.h @@ -0,0 +1,41 @@ +/*! \file + * \brief \ref TextStream outputs text onto the screen in \ref CGA + */ + +/*! \defgroup io I/O subsystem + * \brief The input/output subsystem + */ + +#pragma once +#include "../types.h" + +/*! \brief Output text (form different data type sources) on screen in text + * mode + * \ingroup io + * + * Allows the output of different data types as strings on the \ref CGA + * screen of a PC. + * To achieve this, \ref TextStream is derived from both \ref OutputStream and + * \ref TextWindow and only implements the method \ref TextStream::flush(). + * Further formatting or special effects are implemented in \ref TextWindow. + */ +class TextStream { + // Prevent copies and assignments + TextStream(const TextStream&) = delete; + TextStream& operator=(const TextStream&) = delete; + + public: + /// \copydoc TextWindow::TextWindow(unsigned,unsigned,unsigned,unsigned,bool) + TextStream(unsigned from_col, unsigned to_col, unsigned from_row, + unsigned to_row, bool use_cursor = false); + + /*! \brief Output the buffer contents of the base class \ref Stringbuffer + * + * The method is automatically called when the buffer is full, + * but can also be called explicitly to force output of the current buffer. + * + * + * \todo(11) Implement method + */ + void flush(); +}; diff --git a/interrupt/epilogues.cc b/interrupt/epilogues.cc new file mode 100644 index 0000000..18bbd91 --- /dev/null +++ b/interrupt/epilogues.cc @@ -0,0 +1,12 @@ +#include "epilogues.h" + +#include "guard.h" + +namespace Epilogues { + +void keyboard(Vault& g) { (void)g; } + +void timer(Vault& g) { (void)g; } + +void assassin(Vault& g) { (void)g; } +}; // namespace Epilogues diff --git a/interrupt/epilogues.h b/interrupt/epilogues.h new file mode 100644 index 0000000..a366ebc --- /dev/null +++ b/interrupt/epilogues.h @@ -0,0 +1,50 @@ +/*! \file + * \brief XXX: Write summary + */ +#pragma once +#include "../types.h" + +struct Vault; + +/*! \brief A handler function for an epilogue. + * \ingroup interrupts + * + * It receives the vault directly, because it is executed on level 1/2 (by the + * \ref Guard) . + * + * \note Since it does only receive one parameter, other data must be passed + * in a different way. + */ +using Epilogue = void (*)(Vault&); + +namespace Epilogues { + +/*! + * @brief The keyboard epilogue. + * + * Handle the keyboard Key that has been fetched during the prologue. + * + * \todo(13) print the stored character + * \todo(15) Store the key to the keyboard buffer for user threads. Wake user + * threads waiting for a key using the key semaphore. + * + * @param g + */ +void keyboard(Vault& g); + +/*! + * @brief Timer epilogue + * \todo(15) Preemptively reschedule threads + * \todo(16) Check the bellringer + * \todo(17) Refresh screen with fixed FPS rate + * @param g + */ +void timer(Vault& g); + +/*! \brief Examine the `dying flag` of the current thread and reschedule if + * it is set. + * + * \todo(15) Implement the rescheduling (in \MPStuBS only) + */ +void assassin(Vault& g); +}; // namespace Epilogues diff --git a/interrupt/guard.cc b/interrupt/guard.cc new file mode 100644 index 0000000..957975e --- /dev/null +++ b/interrupt/guard.cc @@ -0,0 +1,30 @@ +#include "guard.h" + +#include "../arch/core.h" +#include "../debug/output.h" +#include "../object/bbuffer.h" +#include "../sync/ticketlock.h" +#include "epilogues.h" + +#define FOR_CURRENT_CORE [Core::getID()] + +//! \brief The protected data for the epilogue level +static Vault global_vault; + +// lists of pending epilogues +static BBuffer epilogue_queue[Core::MAX] = {}; +// Big Kernel Lock (BKL) for the epilogue level +constinit Ticketlock global_lock; +constinit bool epi_flag[Core::MAX] = {false}; + +Vault::Vault() {} + +Guarded::~Guarded() { Guard::leave(); } + +Guarded Guard::enter() { while (true); } + +void Guard::leave() {} + +void Guard::relay(Epilogue handler) { (void)handler; } + +const Vault &Guard::unsafeConstAccess() { return global_vault; } diff --git a/interrupt/guard.h b/interrupt/guard.h new file mode 100644 index 0000000..a15647a --- /dev/null +++ b/interrupt/guard.h @@ -0,0 +1,121 @@ +/*! \file + * \brief \ref Guard synchronizes access to epilogue level + */ + +#pragma once +#include "../object/bbuffer.h" +#include "../object/key.h" +#include "../types.h" +#include "epilogues.h" + +//! \brief The epilogue vault contains the protected data for the epilogue level +struct Vault { + Vault(); + // no copy + Vault(const Vault&) = delete; + Vault& operator=(const Vault&) = delete; +}; + +/*! \brief Lock guard that provides access to the epilogue \ref Vault + * + * This object automatically unlocks the \ref Guard when it goes out of scope. + */ +class Guarded { + public: + //! This constructor should only be used by the \ref Guard + explicit Guarded(Vault& vault) : _vault(vault) {} + //! Leave the critical section + ~Guarded(); + + //! Access the epilogue vault + Vault& vault() { return _vault; } + const Vault& vault() const { return _vault; } + + // no copy + Guarded(const Guarded&) = delete; + Guarded& operator=(const Guarded&) = delete; + + private: + Vault& _vault; +}; + +/*! \brief Synchronizes the kernel with interrupts using the Prologue/Epilogue + * Model \ingroup interrupts + * + * The Guard is used to synchronize between "normal" core activities (currently + * just the text output, later system calls) and interrupt handling routines. + * For this purpose, \ref Guard has to contain one ore more \ref BBuffer + * "queues", in which \ref Epilogue functions can be added. This is necessary if + * the critical section is occupied at the time when an interrupt occurs, and + * the + * \ref Epilogue cannot be executed immediately. The queued epilogues are + * processed when leaving the critical section. + * + * **Hints:** + * - The epilogue queue is a central data structure, whose consistency + * must be ensured. The implementation provided by the \ref BBuffer is not + * entirely safe against concurrency. You need to disable + * interrupts during operations on the buffer. + * - In \MPStuBS, you need a separate epilogue queue for each core, + * in which each processor serializes *its* epilogues. However, epilogues + * on different cores could then be executed in parallel, since the + * critical section is managed separately on a per-core base. This must be + * prevented by using a global \ref Ticketlock to avoid concurrent + * execution of epilogues -- there must never be more than one epilogue + * active on the whole system at the same time!
+ * *Please note:* This [giant lock](https://en.wikipedia.org/wiki/Giant_lock) + * (synchronizing all cores) should not be confused with the (core-specific) + * flag variable that marks only the entry to the epilogue level on the + * corresponding core! + * - Interrupts should be disabled for as short as possible. Due to this + * reason, the prologue/epilogue model allows epilogues to be interrupted + * by prologues. This means that interrupts should be + * \ref Core::Interrupt::enable "enabled" again before the epilogue is + * executed (this includes notifying the APIC about the + * \ref LAPIC::endOfInterrupt() "End-Of-Interrupt") + */ +namespace Guard { + +/*! \brief Entering the critical section from level 0. + * + * Entering the critical section has to be handled differently depending on + * the system: In a single-core system it is sufficient to mark the entry + * by just setting a flag variable (since only one control flow can enter + * the critical section at the same time). However, as soon as there are + * multiple cores, this is no longer the case. If a core wants to enter the + * critical section while *another* core is already in there, it should + * (actively) wait in this method until the critical area is released again. + * + * \todo(13) Implement Method + */ +Guarded enter(); + +/*! \brief Leaving the critical section. + * + * Leaves the critical section and processes all remaining (enqueued) epilogues. + * This may only be called while in level 1/2 after calling \ref enter(). + * + * Note: Usually, this method is called by the destructor of the \ref + * Guarded. + * + * \todo(13) Implement Method + */ +void leave(); + +/*! \brief A prologue wants its epilogue to be processed (entering from level + * 1). + * + * This method is called by the interrupt handlers. + * Whether this is done immediately or the epilogue just enqueued to the + * epilogue queue depends on whether the critical section on *this* Core is + * accessible or not. + * + * \todo(13) Implement Method + */ +void relay(Epilogue handler); + +/*! \brief Access the epilogue vault without taking the lock. + * Beware race conditions! + */ +const Vault& unsafeConstAccess(); +} // namespace Guard diff --git a/interrupt/handlers.asm b/interrupt/handlers.asm new file mode 100644 index 0000000..1a2f059 --- /dev/null +++ b/interrupt/handlers.asm @@ -0,0 +1,14 @@ +[SECTION .text] +[EXTERN handle_keyboard] +[GLOBAL handle_keyboard_asm] + +; entry point for an interrupt to trigger a kernelpanic +; +align 16 +handle_keyboard_asm: + ; The interrupt may be triggered asynchronously, therefore the whole context + ; has to be saved and restored, or the interrupted code might not be able to + ; continue. The C++ compiler will only generates code to preserve + ; non-scratch registers in the high-level interrupt handler -- the scratch + ; registers have to be saved (and restored later) manually! + ; TODO(12): Implement the context save and restore for the keyboard interrupt diff --git a/interrupt/handlers.cc b/interrupt/handlers.cc new file mode 100644 index 0000000..2e33f81 --- /dev/null +++ b/interrupt/handlers.cc @@ -0,0 +1,102 @@ +#include "handlers.h" + +#include "../arch/core_cr.h" +#include "../arch/idt.h" +#include "../arch/lapic.h" +#include "../arch/system.h" +#include "../debug/kernelpanic.h" +#include "../debug/output.h" + +void printContext(const InterruptContext *context) { + DBG << "ip: " << hex << context->cs << ':' << context->ip + << " sp: " << context->ss << ':' << context->sp << " flags" << bin + << context->flags << endl; +} + +[[gnu::interrupt]] void handle_invalid_opcode(InterruptContext *context) { + DBG << "Invalid opcode encoutered" << endl; + printContext(context); + kernelpanic("Invalid opcode!"); +} + +[[gnu::interrupt]] void handle_double_fault(InterruptContext *context, + uint64_t error) { + (void)error; + DBG << "Double fault encoutered" << endl; + printContext(context); + kernelpanic("Double fault!"); +} + +[[gnu::interrupt]] void handle_invalid_tss(InterruptContext *context, + uint64_t error) { + DBG << "Invalid tss encoutered. Offending selector idx: " << dec << error + << endl; + printContext(context); + kernelpanic("Invalid TSS!"); +} + +[[gnu::interrupt]] void handle_general_protection_fault( + InterruptContext *context, uint64_t error) { + DBG << "General protection fault encoutered. Error code: " << dec << error + << endl; + printContext(context); + kernelpanic("General protection fault!"); +} + +enum PAGE_FAULT_ERROR { + PF_ERR_PRESENT = 0x1, + PF_ERR_WRITE = 0x2, + PF_ERR_USER = 0x4, + PF_ERR_RESERVED = 0x8, + PF_ERR_IFETCH = 0x10, +}; + +[[gnu::interrupt]] void handle_page_fault(InterruptContext *context, + uint64_t error) { + (void)error; + DBG << "Page fault encoutered at linear address " << hex + << Core::CR<2>::read() << endl + << (error & PF_ERR_PRESENT ? "present" : "non-present") << " page|" + << (error & PF_ERR_WRITE ? "write" : "read") << " access|" + << (error & PF_ERR_USER ? "user" : "supervisor") << "|" + << (error & PF_ERR_RESERVED ? "reserved bit int pte" : "") << "|" + << (error & PF_ERR_IFETCH ? "instrution" : "data") << " fetch|" << endl; + printContext(context); + kernelpanic("Page fault!"); +} + +void handle_keyboard() {} + +[[gnu::interrupt]] void handle_panic(InterruptContext *context) { + (void)context; +} + +[[gnu::interrupt]] void handle_timer(InterruptContext *context) { + (void)context; +} + +[[gnu::interrupt]] void handle_assassin(InterruptContext *context) { + (void)context; +} +[[gnu::interrupt]] void handle_wakeup(InterruptContext *context) { + (void)context; +} + +void initInterruptHandlers() { + // Some handlers that are useful for debugging + IDT::set(Core::Interrupt::Vector::INVALID_OPCODE, + IDT::InterruptDescriptor::Returning(handle_invalid_opcode)); + IDT::set(Core::Interrupt::Vector::DOUBLE_FAULT, + IDT::InterruptDescriptor::DivergingWithError(handle_double_fault)); + IDT::set(Core::Interrupt::Vector::INVALID_TSS, + IDT::InterruptDescriptor::ReturningWithError(handle_invalid_tss)); + IDT::set(Core::Interrupt::Vector::GENERAL_PROTECTION_FAULT, + IDT::InterruptDescriptor::ReturningWithError( + handle_general_protection_fault)); + IDT::set(Core::Interrupt::Vector::PAGE_FAULT, + IDT::InterruptDescriptor::ReturningWithError(handle_page_fault)); + + // TODO: Add more handlers here + // Load the idt pointer + IDT::load(); +} diff --git a/interrupt/handlers.h b/interrupt/handlers.h new file mode 100644 index 0000000..01cb9af --- /dev/null +++ b/interrupt/handlers.h @@ -0,0 +1,109 @@ +/*! \file All interrupts need to start somewhere. This file contains the entry + * points for all interrupts handled by StuBS. + * \brief The Interrupt Subsystem + * \defgroup interrupts Interrupt Handling + */ +#pragma once +#include "../types.h" + +/*! \brief Initialize the IDT. + * + * The interrupt subsystem of StubBS contains all functionality to accept + * interrupts from the hardware and process them. + * In later exercises the interrupts will enable applications to + * execute core functionality (system calls). + * The entry point for the interrupt subsystem is the function + * 'interrupt_entry_VECTOR' (in `interrupt/handler.asm`). + * + * \todo(12) Register your own interrupt handlers + */ +void initInterruptHandlers(); + +struct InterruptContext; + +/*! + * @brief Helper function for printf-debugging the InterruptContext + */ +void printContext(const InterruptContext *context); + +/*! + * @brief An interrupt handler for the INVALID_OPCODE trap + */ +[[gnu::interrupt]] void handle_invalid_opcode(InterruptContext *context); +/*! + * @brief A double fault occurs when another exception occurs during exception + * handling. + * + * In this case, the OS cannot recover anymore. This can happen e.g. + * during page fault handling. + */ +[[gnu::interrupt]] void handle_double_fault(InterruptContext *context, + uint64_t error); +/*! + * @brief If the task state segment is configured incorrectly, the kernel cannot + * switch the privilege levels during interrupts. + */ +[[gnu::interrupt]] void handle_invalid_tss(InterruptContext *context, + uint64_t error); +/*! + * @brief When the CPU tried to execute an unprivileged opcode or exceeds + * segmentation bounds, the GPF exception is raised. + */ +[[gnu::interrupt]] void handle_general_protection_fault( + InterruptContext *context, uint64_t error); +/*! + * @brief With paging enabled, an invalid access to a memory page causes a page + * fault. + */ +[[gnu::interrupt]] void handle_page_fault(InterruptContext *context, + uint64_t error); + +extern "C" { // disable C++ name mangling for asm function + +/*! \brief Assembly interrupt handler for the keyboard. + * + * On keyboard interrupt, the register state is saved to and restored from the + * stack. This function wraps the handle_keyboard C-function. + * + * \todo(12) Implement in assembly + */ +[[gnu::interrupt]] void handle_keyboard_asm(InterruptContext *context); + +/*! \brief Higher-level Interrupt handler for the keyboard. + * + * On keyboard interrupt, the PS2-Controller may contain a valid Key that has to + * be fetched. + * + * \todo(12) Fetch a single key + * \todo(13) Extend to use the Prologue-Epilogue pattern + */ +void handle_keyboard(); +} + +/*! \brief handle_panic + * + * \todo(12) Trigger a kernel panic + */ +[[gnu::interrupt]] void handle_panic(InterruptContext *context); + +/*! \brief handle_timer + * + * \todo(15) Handle the timer interrupt + */ +[[gnu::interrupt]] void handle_timer(InterruptContext *context); + +/*! \brief handle_assassin + * + * Handler for the assassin IPI, i.e. a thread shall be killed. + * + * \todo(15) Handle the assassin interrupt (in \MPStuBS only) + */ +[[gnu::interrupt]] void handle_assassin(InterruptContext *context); + +/*! \brief handle_wakeup + * + * In Multicore systems, an IPI is used to wake a sleeping core. + * + * \todo(16) Handle the wakeup interrupt (in \MPStuBS only) + */ +[[gnu::interrupt]] void handle_wakeup(InterruptContext *context); diff --git a/main.cc b/main.cc new file mode 100644 index 0000000..cba4c88 --- /dev/null +++ b/main.cc @@ -0,0 +1,28 @@ + +#include "arch/lapic.h" +#include "boot/startup_ap.h" +#include "debug/output.h" + +// Main function +// (the bootstrap processor starts here)} +extern "C" int main() { + unsigned int numCPUs = Core::count(); + DBG_VERBOSE << "Number of CPUs: " << numCPUs << endl; + + /* Start application processors + * To avoid unexpected behaviour, make sure that interrupts are not + * enabled before the APs are booted. Otherwise it might interfere with the + * Startup IPIs or even block devices like keyboard because of a missing EOI + */ + ApplicationProcessor::boot(); + + return 0; +} + +// Main function for application processors +extern "C" int main_ap() { + DBG_VERBOSE << "CPU core " << static_cast(Core::getID()) << " / LAPIC " + << static_cast(LAPIC::getID()) << " in main_ap()" << endl; + + return 0; +} diff --git a/nix-develop.sh b/nix-develop.sh new file mode 100644 index 0000000..5c398c7 --- /dev/null +++ b/nix-develop.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env sh +## This repo should not directly contain a flake.nix, to avoid it being automatically copied to the (locally) world-readable Nix store. +exec nix develop path:"$( cd "$(dirname "${BASH_SOURCE[0]}")" ; pwd -P )"/utils "$@" diff --git a/object/bbuffer.h b/object/bbuffer.h new file mode 100644 index 0000000..fdb57bf --- /dev/null +++ b/object/bbuffer.h @@ -0,0 +1,62 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief Contains a \ref BBuffer "bounded buffer" + */ + +#pragma once +#include "../types.h" + +/*! \brief The class BBuffer implements a bounded buffer, that is a circular + * buffer with a fixed capacity. + * + * \tparam T the type of data to be stored + * \tparam CAP the buffers capacity (must be greater than 1) + */ +template +class BBuffer { + static_assert(CAP > 1, "BBuffer of size 1 is unsupported."); + // Prevent copies and assignments + BBuffer(const BBuffer&) = delete; + BBuffer& operator=(const BBuffer&) = delete; + + private: + T data[CAP]; + volatile unsigned in; + volatile unsigned out; + + public: + /*! \brief Constructor that initialized an empty buffer. + */ + BBuffer() : in(0), out(0) {} + + /*! \brief Add an element to the buffer. + * \param val The element to be added. + * \return `false` if the buffer is full and no element can be added; `true` + * otherwise. + */ + bool produce(T val) { + unsigned nextin = (in + 1) % CAP; + if (nextin != out) { + data[in] = val; + in = nextin; + return true; + } + return false; + } + + /*! \brief Remove an element from the buffer. + * \param val Output parameter that receives the next element. If there is + * (currently) no next element, `val` will not be modified. + * \return `false` if the buffer was empty; `true` if the buffer was + * not empty and an element was written to val. + */ + bool consume(T& val) { + if (in != out) { + val = data[out]; + out = (out + 1) % CAP; + return true; + } + return false; + } +}; diff --git a/object/key.cc b/object/key.cc new file mode 100644 index 0000000..b444d99 --- /dev/null +++ b/object/key.cc @@ -0,0 +1,118 @@ +#include "key.h" + +// Character table for scan codes for US keyboards +static struct { + const unsigned char normal, // Character without modifiers + shift, // Character with pressed Shift, Capslock, or in Numpad + alt; // Character with pressed Alt key +} ascii_tab[Key::Scancode::KEYS] = { + {0, 0, 0}, // KEY_INVALID + {0, 0, 0}, // KEY_ESCAPE + {'1', '!', 0}, // KEY_1 + {'2', '"', 253}, // KEY_2 + {'3', 21, 0}, // KEY_3 + {'4', '$', 0}, // KEY_4 + {'5', '%', 0}, // KEY_5 + {'6', '&', 0}, // KEY_6 + {'7', '/', '{'}, // KEY_7 + {'8', '(', '['}, // KEY_8 + {'9', ')', ']'}, // KEY_9 + {'0', '=', '}'}, // KEY_0 + {225, '?', '\\'}, // KEY_DASH + {39, 96, 0}, // KEY_EQUAL + {'\b', 0, 0}, // KEY_BACKSPACE + {0, 0, 0}, // KEY_TAB + {'q', 'Q', '@'}, // KEY_Q + {'w', 'W', 0}, // KEY_W + {'e', 'E', 0}, // KEY_E + {'r', 'R', 0}, // KEY_R + {'t', 'T', 0}, // KEY_T + {'z', 'Z', 0}, // KEY_Y + {'u', 'U', 0}, // KEY_U + {'i', 'I', 0}, // KEY_I + {'o', 'O', 0}, // KEY_O + {'p', 'P', 0}, // KEY_P + {129, 154, 0}, // KEY_OPEN_BRACKET + {'+', '*', '~'}, // KEY_CLOSE_BRACKET + {'\n', 0, 0}, // KEY_ENTER + {0, 0, 0}, // KEY_LEFT_CTRL + {'a', 'A', 0}, // KEY_A + {'s', 'S', 0}, // KEY_S + {'d', 'D', 0}, // KEY_D + {'f', 'F', 0}, // KEY_F + {'g', 'G', 0}, // KEY_G + {'h', 'H', 0}, // KEY_H + {'j', 'J', 0}, // KEY_J + {'k', 'K', 0}, // KEY_K + {'l', 'L', 0}, // KEY_L + {148, 153, 0}, // KEY_SEMICOLON + {132, 142, 0}, // KEY_APOSTROPH + {'^', 248, 0}, // KEY_GRAVE_ACCENT + {0, 0, 0}, // KEY_LEFT_SHIFT + {'#', 39, 0}, // KEY_BACKSLASH + {'y', 'Y', 0}, // KEY_Z + {'x', 'X', 0}, // KEY_X + {'c', 'C', 0}, // KEY_C + {'v', 'V', 0}, // KEY_V + {'b', 'B', 0}, // KEY_B + {'n', 'N', 0}, // KEY_N + {'m', 'M', 230}, // KEY_M + {',', ';', 0}, // KEY_COMMA + {'.', ':', 0}, // KEY_PERIOD + {'-', '_', 0}, // KEY_SLASH + {0, 0, 0}, // KEY_RIGHT_SHIFT + {'*', '*', 0}, // KEY_KP_STAR + {0, 0, 0}, // KEY_LEFT_ALT + {' ', ' ', 0}, // KEY_SPACEBAR + {0, 0, 0}, // KEY_CAPS_LOCK + {0, 0, 0}, // KEY_F1 + {0, 0, 0}, // KEY_F2 + {0, 0, 0}, // KEY_F3 + {0, 0, 0}, // KEY_F4 + {0, 0, 0}, // KEY_F5 + {0, 0, 0}, // KEY_F6 + {0, 0, 0}, // KEY_F7 + {0, 0, 0}, // KEY_F8 + {0, 0, 0}, // KEY_F9 + {0, 0, 0}, // KEY_F10 + {0, 0, 0}, // KEY_NUM_LOCK + {0, 0, 0}, // KEY_SCROLL_LOCK + {0, '7', 0}, // KEY_KP_7 + {0, '8', 0}, // KEY_KP_8 + {0, '9', 0}, // KEY_KP_9 + {'-', '-', 0}, // KEY_KP_DASH + {0, '4', 0}, // KEY_KP_4 + {0, '5', 0}, // KEY_KP_5 + {0, '6', 0}, // KEY_KP_6 + {'+', '+', 0}, // KEY_KP_PLUS + {0, '1', 0}, // KEY_KP_1 + {0, '2', 0}, // KEY_KP_2 + {0, '3', 0}, // KEY_KP_3 + {0, '0', 0}, // KEY_KP_0 + {127, ',', 0}, // KEY_KP_PERIOD + {0, 0, 0}, // KEY_SYSREQ + {0, 0, 0}, // KEY_EUROPE_2 + {'<', '>', '|'}, // KEY_F11 + {0, 0, 0}, // KEY_F12 + {0, 0, 0}, // KEY_KP_EQUAL +}; + +unsigned char Key::ascii() const { + // Select the correct table depending on the modifier bits. + // For the sake of simplicity, Shift and NumLock have precedence over Alt. + // The Ctrl modifier does not have a distinct table. + + if (!valid()) { + return '\0'; + } else if (shift || + (caps_lock && ((scancode >= KEY_Q && scancode <= KEY_P) || + (scancode >= KEY_A && scancode <= KEY_L) || + (scancode >= KEY_Z && scancode <= KEY_M))) || + (num_lock && scancode >= KEY_KP_7 && scancode <= KEY_KP_PERIOD)) { + return ascii_tab[scancode].shift; + } else if (alt()) { + return ascii_tab[scancode].alt; + } else { + return ascii_tab[scancode].normal; + } +} diff --git a/object/key.h b/object/key.h new file mode 100644 index 0000000..ce9cc06 --- /dev/null +++ b/object/key.h @@ -0,0 +1,165 @@ +/*! \file + * \brief \ref Key, an abstraction for handling pressed keys and their + * modifiers + */ + +#pragma once +#include "../types.h" + +/*! \brief Class that abstracts a key, made up of the scan code and the modifier + * bits. + */ +struct Key { + /*! \brief The keys' scan codes (code 1) + */ + enum Scancode : uint8_t { + // Invalid scan code + KEY_INVALID = 0, + + // "real" valid scan codes + KEY_ESCAPE, + KEY_1, + KEY_2, + KEY_3, + KEY_4, + KEY_5, + KEY_6, + KEY_7, + KEY_8, + KEY_9, + KEY_0, + KEY_DASH, + KEY_EQUAL, + KEY_BACKSPACE, + KEY_TAB, + KEY_Q, + KEY_W, + KEY_E, + KEY_R, + KEY_T, + KEY_Y, + KEY_U, + KEY_I, + KEY_O, + KEY_P, + KEY_OPEN_BRACKET, + KEY_CLOSE_BRACKET, + KEY_ENTER, + KEY_LEFT_CTRL, + KEY_A, + KEY_S, + KEY_D, + KEY_F, + KEY_G, + KEY_H, + KEY_J, + KEY_K, + KEY_L, + KEY_SEMICOLON, + KEY_APOSTROPH, + KEY_GRAVE_ACCENT, + KEY_LEFT_SHIFT, + KEY_BACKSLASH, + KEY_Z, + KEY_X, + KEY_C, + KEY_V, + KEY_B, + KEY_N, + KEY_M, + KEY_COMMA, + KEY_PERIOD, + KEY_SLASH, + KEY_RIGHT_SHIFT, + KEY_KP_STAR, + KEY_LEFT_ALT, + KEY_SPACEBAR, + KEY_CAPS_LOCK, + KEY_F1, + KEY_F2, + KEY_F3, + KEY_F4, + KEY_F5, + KEY_F6, + KEY_F7, + KEY_F8, + KEY_F9, + KEY_F10, + KEY_NUM_LOCK, + KEY_SCROLL_LOCK, + KEY_KP_7, + KEY_KP_8, + KEY_KP_9, + KEY_KP_DASH, + KEY_KP_4, + KEY_KP_5, + KEY_KP_6, + KEY_KP_PLUS, + KEY_KP_1, + KEY_KP_2, + KEY_KP_3, + KEY_KP_0, + KEY_KP_PERIOD, + KEY_SYSREQ, + KEY_EUROPE_2, + KEY_F11, + KEY_F12, + KEY_KP_EQUAL, + + // Number of keys (excluding aliases below) + KEYS, + + // aliases + KEY_DIV = KEY_7, + KEY_DEL = KEY_KP_PERIOD, + KEY_UP = KEY_KP_8, + KEY_DOWN = KEY_KP_2, + KEY_LEFT = KEY_KP_4, + KEY_RIGHT = KEY_KP_6, + }; + + Scancode scancode; + + // bit masks for the modifier keys + bool shift : 1, alt_left : 1, alt_right : 1, ctrl_left : 1, ctrl_right : 1, + caps_lock : 1, num_lock : 1, scroll_lock : 1; + + /*! \brief Default constructor: Instantiates an invalid key by setting ASCII, + * scan code, and modifier bits to 0 + */ + Key() + : scancode(KEY_INVALID), + shift(false), + alt_left(false), + alt_right(false), + ctrl_left(false), + ctrl_right(false), + caps_lock(false), + num_lock(false), + scroll_lock(false) {} + + /*! \brief Invalid keys have a scancode = 0 + * \return Checks whether a key is valid. + */ + bool valid() const { return scancode != KEY_INVALID && scancode < KEYS; } + + /*! \brief Marks the key as invalid by setting the scan code to 0. + * + */ + void invalidate() { scancode = KEY_INVALID; } + + /*! \brief Get the key's ASCII value + * \return the key's ASCII value + */ + unsigned char ascii() const; + + /*! \brief Indicates whether the ALT modifier is set + * \return `true` if ALT key was pressed during key press + */ + bool alt() const { return alt_left || alt_right; } + + /*! \brief Indicates whether the CTRL modifier is set + * \return `true` if CTRL key was pressed during key press + */ + bool ctrl() const { return ctrl_left || ctrl_right; } +}; diff --git a/object/outputstream.cc b/object/outputstream.cc new file mode 100644 index 0000000..7a61033 --- /dev/null +++ b/object/outputstream.cc @@ -0,0 +1,181 @@ +#include "outputstream.h" + +// operator <<: Converts the value in given data type to a string + +// Print a single character (trivial) +OutputStream& OutputStream::operator<<(char c) { + put(c); + return *this; +} + +OutputStream& OutputStream::operator<<(unsigned char c) { + return *this << static_cast(c); +} + +// Printing a null-terminated string +OutputStream& OutputStream::operator<<(const char* string) { + while ((*string) != '\0') { + put(*string); + string++; + } + return *this; +} + +OutputStream& OutputStream::operator<<(bool b) { + return *this << (b ? "true" : "false"); +} + +// Print integral numbers in number system base. +// All signed types are promoted to long long, +// all unsigned types to unsigned long long. + +OutputStream& OutputStream::operator<<(short ival) { + return *this << static_cast(ival); +} + +OutputStream& OutputStream::operator<<(unsigned short ival) { + return *this << static_cast(ival); +} + +OutputStream& OutputStream::operator<<(int ival) { + return *this << static_cast(ival); +} + +OutputStream& OutputStream::operator<<(unsigned int ival) { + return *this << static_cast(ival); +} + +OutputStream& OutputStream::operator<<(long ival) { + return *this << static_cast(ival); +} + +OutputStream& OutputStream::operator<<(unsigned long ival) { + return *this << static_cast(ival); +} + +// Print a signed , integral number. +OutputStream& OutputStream::operator<<(long long ival) { + /* Print '-' if number is negative + * + * In case ival is equal to LONG_LONG_MIN (0x8000000000000000), this + * multiplication with -1 will overflow and, as for all signed overflows, + * is not specified in C/C++. Thus, this operation will only work when + * the system uses two's complement: + * ~(0x8000000000000000) + 1 = 0x7fffffffffffffff + 1 = 0x8000000000000000 + * + * When casting 0x8000000000000000 to unsigned long long, the value will + * be (correctly) interpreted as -(LONG_LONG_MIN). + * + * A solution conforming (more) to the standard could be: + * if ((ival < 0) && (base == 10)) { + * put('-'); + * if (ival == LONG_LONG_MIN) { + * return *this << static_cast(LONG_LONG_MAX - + * (LONG_LONG_MAX + LONG_LONG_MIN)); } else { return *this << + * static_cast(-ival); + * } + * (However it introduces additional overhead) + */ + if ((ival < 0) && (base == 10)) { + put('-'); + ival = -ival; + } + // Print the remaining positive number using the unsigned output + return *this << static_cast(ival); +} + +// Print a unsigned, integral number. +OutputStream& OutputStream::operator<<(unsigned long long ival) { + if (base == 0) { + base = 16; + } + + if (base == 2) { + put('0'); + put('b'); + } else if (base == 8) { + put('0'); // octal numbers are prefixed with 0 + } else if (base == 16) { + put('0'); // hexadecimal numbers are prefixed with 0x + put('x'); + } + + // Determine the largest potency in the number system used, which is + // still smaller than the number to be printed + unsigned long long div; + for (div = 1; ival / div >= static_cast(base); + div *= base) { + } + + // print number char by char + for (; div > 0; div /= static_cast(base)) { + auto digit = ival / div; + if (digit < 10) { + put(static_cast('0' + digit)); + } else { + put(static_cast('a' + digit - 10)); + } + + ival %= div; + } + return *this; +} + +// Print a pointer as hexadecimal number +OutputStream& OutputStream::operator<<(const void* ptr) { + int oldbase = base; + base = 16; + *this << reinterpret_cast(ptr); + base = oldbase; + return *this; +} + +// Calls one of the manipulator functions +OutputStream& OutputStream::operator<<(OutputStream& (*f)(OutputStream&)) { + return f(*this); +} + +/* STREAM MANIPULATORS + * + * The functions below take and return a reference to an OutputStream object + * and are called by OutputStream& operator << (OutputStream& (*f) + * (OutputStream&)); The purpose of theses manipulator functions is modifying + * the behavior of the stream the are executed on, such as changing the number + * system. + */ + +// flush: Explicit buffer flush +OutputStream& flush(OutputStream& os) { + os.flush(); + return os; +} + +// endl: Inserts a newline to the output +OutputStream& endl(OutputStream& os) { + os << '\n' << flush; + return os; +} + +// bin: Selects the binary number system +OutputStream& bin(OutputStream& os) { + os.base = 2; + return os; +} + +// oct: Selects the octal number system +OutputStream& oct(OutputStream& os) { + os.base = 8; + return os; +} + +// dec: Selects the decimal number system +OutputStream& dec(OutputStream& os) { + os.base = 10; + return os; +} + +// hex: Selects the hexadecimal number system +OutputStream& hex(OutputStream& os) { + os.base = 16; + return os; +} diff --git a/object/outputstream.h b/object/outputstream.h new file mode 100644 index 0000000..baac747 --- /dev/null +++ b/object/outputstream.h @@ -0,0 +1,209 @@ +/*! \file + * \brief This file contains the \ref OutputStream + * + * Along with the class OutputStream itself, this file contains definitions for + * the manipulators \ref hex, \ref dec, \ref oct, and \ref bin, which are used + * for changing the radix, and \ref endl for signaling the end of the current + * line. + * \ingroup io + * + * \par Manipulators + * To simplify formatting text and numbers using the class OutputStream, we + * define so-called manipulators. For example, the expression kout << "a = " + * << dec << a << " is hexadecimal " << hex << a << endl; should, at first, + * print the value stored in decimal and then in hexadecimal form, followed by a + * line break. The intended properties can be realized by implementing \ref hex, + * \ref dec, \ref oct, \ref bin, and \ref endl as functions (i.e., they are, in + * particular, not methods of \ref OutputStream) that take (as first parameter) + * and return a reference to an OutputStream object. When compiling the + * expression shown above, the method OutputStream& OutputStream::operator<< + * ((*f*) (OutputStream&)) is chosen when one of the functions \ref hex, + * \ref dec, \ref oct, \ref bin, or \ref endl is streamed into an \ref + * OutputStream, which finally will execute the passed function. + * + * \note The term manipulator originates from the book + * [The C++ Programming Language](http://www.stroustrup.com/4th.html) + * by Bjarne Stroustrup. Refer to this book for further explanations. + */ + +#pragma once +#include "../types.h" +#include "./stringbuffer.h" + +/*! \brief The class OutputStream corresponds, essentially, to the class ostream + * from the C++ IO-Stream library. + * + * As relying on the method \ref Stringbuffer::put() is quite cumbersome when + * not only printing single characters, but numbers and whole strings, the + * class OutputStream provides a convenient way of composing output of + * variables of varying data types. Therefore, OutputStream implements shift + * operators `operator<<`` for various data types (similar to those known from + * the C++ IO-Stream library) + * + * For further convenience, OutputStream also allows printing integral numbers + * in decimal, binary, octal, and hexadecimal format. Remember that, for + * negative numbers, the sign is only printed when using the decimal number + * system; for binary, octal, and hex, the number is printed as stored in the + * machine word without interpreting the sign. For Intel CPUs, two's complement + * is used for storing negative values, `-1`, for example, will print hex + * `FFFFFFFF` and octal `37777777777`. + * + * OutputStream's public methods/operators all return a reference to the object + * they are called on (i.e. `*this`). Returning `*this` allows chaining those + * stream operators in a single expression, such as + * kout << "a = " << a; + * + * At this point in time, OutputStream implements `operator<<`` for chars, + * strings and whole numbers. An additional `operator<<` allows using + * manipulators whose detailed description is given below. + */ + +class OutputStream : public Stringbuffer { + OutputStream(const OutputStream&) = delete; + OutputStream& operator=(const OutputStream&) = delete; + + public: + /*! \brief Number system used for printing integral numbers (one of 2, + * 8, 10, or 16) + */ + int base; + + /*! \brief Default constructor. Initial number system is decimal. + * + */ + OutputStream() : base(10) {} + + /*! \brief Destructor + */ + virtual ~OutputStream() {} + + /*! \brief Clears the buffer. + * + * Pure virtual method that must be implemented by derived + * (non-abstract) classes. + * Formatting of the buffer contents can be implemented differently by + * different derived classes + */ + virtual void flush() = 0; + + /*! \brief Print a single character + * + * \param c Character to be printed + * \return Reference to OutputStream os; allows operator chaining. + */ + OutputStream& operator<<(char c); + + /*! \brief Print a single character + * \note In C, there are no "characters" in that sense, but only + * integers. A `char`, therefore, is a 8 bit number with the most + * significant bit (optionally) representing a sign. + * Depending on whether signed or not, the value ranges are [-128, 127] + * or [0; 255]. For GCC, a `char` is a `signed char`. + * + * \param c Character to be printed + * \return Reference to OutputStream os; allows operator chaining. + */ + OutputStream& operator<<(unsigned char c); + + /*! \brief Printing a null-terminated string + * + * \param string String to be printed + * \return Reference to OutputStream os; allows operator chaining. + */ + OutputStream& operator<<(const char* string); + + /*! \brief Print a boolean value + * + * \param b Boolean to be printed + * \return Reference to OutputStream os; allows operator chaining. + */ + OutputStream& operator<<(bool b); + + /*! \brief Print an integral number in radix base + * + * \param ival Number to be printed + * \return Reference to OutputStream os; allows operator chaining. + */ + OutputStream& operator<<(short ival); + + /// \copydoc OutputStream::operator<<(short) + OutputStream& operator<<(unsigned short ival); + + /// \copydoc OutputStream::operator<<(short) + OutputStream& operator<<(int ival); + + /// \copydoc OutputStream::operator<<(short) + OutputStream& operator<<(unsigned int ival); + + /// \copydoc OutputStream::operator<<(short) + OutputStream& operator<<(long ival); + + /// \copydoc OutputStream::operator<<(short) + OutputStream& operator<<(unsigned long ival); + + /// \copydoc OutputStream::operator<<(short) + OutputStream& operator<<(long long ival); + + /// \copydoc OutputStream::operator<<(short) + OutputStream& operator<<(unsigned long long ival); + + /*! \brief Print a pointer as hexadecimal number + * + * \param ptr Pointer to be printed + * \return Reference to OutputStream os; allows operator chaining. + */ + OutputStream& operator<<(const void* ptr); + + /*! \brief Calls one of the manipulator functions. + * + * Method that calls the manipulator functions defined below, which + * allow modifying the stream's behavior by, for instance, changing the + * number system. + * + * \param f Manipulator function to be called + * \return Reference to OutputStream os; allows operator chaining. + */ + OutputStream& operator<<(OutputStream& (*f)(OutputStream&)); +}; + +/*! \brief Enforces a buffer flush. + * + * \param os Reference to stream to be flushed. + * \return Reference to OutputStream os; allows operator chaining. + */ +OutputStream& flush(OutputStream& os); + +/*! \brief Prints a newline character to the stream and issues a buffer flush. + * + * \param os Reference to stream to be modified. + * \return Reference to OutputStream os; allows operator chaining. + */ +OutputStream& endl(OutputStream& os); + +/*! \brief Print subsequent numbers in binary form. + * + * \param os Reference to stream to be modified. + * \return Reference to OutputStream os; allows operator chaining. + */ +OutputStream& bin(OutputStream& os); + +/*! \brief Print subsequent numbers in octal form. + * + * \param os Reference to stream to be modified. + * \return Reference to OutputStream os; allows operator chaining. + */ +OutputStream& oct(OutputStream& os); + +/*! \brief Print subsequent numbers in decimal form. + * + * \param os Reference to stream to be modified. + * \return Reference to OutputStream os; allows operator chaining. + */ +OutputStream& dec(OutputStream& os); + +/*! \brief Print subsequent numbers in hex form. + * + * \param os Reference to stream to be modified. + * \return Reference to OutputStream os; allows operator chaining. + */ +OutputStream& hex(OutputStream& os); diff --git a/object/queue.h b/object/queue.h new file mode 100644 index 0000000..8b60fbf --- /dev/null +++ b/object/queue.h @@ -0,0 +1,293 @@ +/*! \file + * \brief Templated \ref Queue for arbitrary objects. + */ + +#pragma once +#include "../arch/core.h" +#include "../debug/assert.h" +#include "../object/outputstream.h" +#include "../types.h" + +/*! \brief Templated Queue for arbitrary objects. + * + * Queue is implemented by a head-object (Queue) and next-pointers embedded + * in the queued objects. This Queue supports arrays of next-pointers by passing + * an index into the constructor identifying the index into the next-pointer + * array. By passing a different get_link function into the constructor, the + * member name of the next-pointer array can be changed and objects can be + * contained in different independent queues. + */ +template +class Queue { + /*! \brief Default get_link implementation returns a pointer to the + * link_index'th element of the next-pointer array. + * The function assumes a member named "queue_link" that stores the + * next-pointer. + * + * If your object contains a queue_link member you can just ignore this + * function and the get_link keyword argument of the constructor. + * + * \param[in] obj the object whose link should be accessed. + * \param[in] link_index the index within the array. + * + * \return A pointer to the next-object pointer. + */ + static T** default_get_link(T& obj, unsigned link_index) { + assert(link_index < sizeof(T::queue_link) / sizeof(void*)); + return &obj.queue_link[link_index]; + } + /// Type definition for the get_link function + typedef T** (*NextFunc)(T&, unsigned); + + /// Queue-local index into the next-pointer array + unsigned link_index; + + /// Provides the same signature for single- and multi-core Queue + T** get_link_wrapped(T& obj) { return get_link(obj, link_index); } + + /// Function pointer to the get_link function, called whenever the + /// next pointer array is referenced + const NextFunc get_link; + /// head points to the first element (the one returned on first dequeue). + /// Can be nullptr if the queue is empty. + T* head; + /// tail points to the last element (the one last added). + /// Is only valid if head != nullptr + T* tail; + + // Prevent copies and assignments + Queue(const Queue&) = delete; + Queue& operator=(const Queue&) = delete; + + public: + /*! \brief Minimal forward iterator + * You can use this iterator to iterate the queue like a normal STL container. + * It only supports forward iteration, since the queue is single linked. + */ + class Iterator { + private: + Queue& queue; + T* current; + friend class Queue; + Iterator(Queue& queue, T* current) : queue(queue), current(current) {} + + public: + Iterator operator+(unsigned num) { + if (current == nullptr) { + return *this; + } + T* temp = current; + while (num--) { + temp = queue.next(*temp); + } + return Iterator(queue, temp); + } + + // pre increment + Iterator& operator++() { + current = queue.next(*current); + return *this; + } + + // post increment + Iterator operator++(int) { + auto temp = Iterator(queue, current); + current = queue.next(*current); + return temp; + } + + T* operator*() { return current; } + + bool operator==(const Iterator& other) { return current == other.current; } + + bool operator!=(const Iterator& other) { return !(*this == other); } + }; + + constexpr Queue(Queue&&) = default; + + /*! \brief Constructor + * \param[in] link_index denotes the index into the next-pointer array + * to be used by this + *queue-object + * \param[in] get_link A function pointer to the get_link, i.e. a function + * which returns a pointer to the + *next-pointer of an element in the Queue. + */ + explicit Queue(unsigned link_index, NextFunc get_link = default_get_link) + : link_index(link_index), + get_link(get_link), + head(nullptr), + tail(nullptr) {} + + /*! \brief Enqueues the provided item at the end of the queue. If the element + * is already contained in the queue, false will be returned + * \param[in] item element to be appended (enqueued). + * \return false if the element already was enqueued (and nothing was done) + * or not (and it is now enqueued, then true) + */ + bool enqueue(T& item) { + T** nextptr = get_link_wrapped(item); + if (*nextptr != nullptr || (head != nullptr && tail == &item)) { + return false; + } + *nextptr = nullptr; + + if (head == nullptr) { + head = tail = &item; + } else { + assert(tail != nullptr); + *get_link_wrapped(*tail) = &item; + tail = &item; + } + return true; + } + + /*! \brief insert a new element at the start of the queue + * \param[in] item the new item to add + * \return true if successful, false if item was already in the queue + **/ + bool insertFirst(T& item) { + T** nextptr = get_link_wrapped(item); + if (*nextptr != nullptr || (head != nullptr && tail == &item)) { + return false; + } + + if (head == nullptr) { + tail = &item; + } + *nextptr = head; + head = &item; + return true; + } + + /*! \brief Insert a new element item into the list after an element after. + * Returns false if item is already in the/a list or after is not in this + *list + * \param[in] after the element after which the new one should be inserted + * \param[in] item the new element to add + * \return true if successful, false if item was in the list or after was not + **/ + bool insertAfter(T& after, T& item) { + // if queue is empty there is no after + // and tail is not valid so we need to check head here + if (head == nullptr) { + return false; + } + + if (&after == tail) { + return enqueue(item); + } + T** nextptr = get_link_wrapped(item); + // if item is already in the list return false + if (*nextptr != nullptr || tail == &item) { + return false; + } + + T** pnextptr = get_link_wrapped(after); + // if after is NOT in the list, return false + if (!(pnextptr != nullptr || tail == &after)) { + return false; + } + + *nextptr = *pnextptr; + *pnextptr = &item; + return true; + } + + /*! \brief return the next element of a given one or nullptr if the end is + *reached + * \param[in] item the current item + * \return the next element or nullptr if the end is reached or the item is + *not in this list + **/ + T* next(T& item) { + T** nextptr = get_link_wrapped(item); + // if item is already in the list return nullptr + if (head == nullptr || (*nextptr == nullptr && tail != &item)) { + return nullptr; + } + + return *nextptr; + } + + /*! \brief Return whether or not the queue is empty + * \return True if the queue is empty or false otherwise. + */ + bool is_empty() const { return (head == nullptr); } + + /*! \brief Removes the first element in the queue and returns it. + * \note Does not update the tail-pointer + * \return Pointer to the removed item or `nullptr` if the queue was empty. + */ + T* dequeue() { + T* out = head; + if (head != nullptr) { + T** nextptr = get_link_wrapped(*head); + head = *nextptr; + *nextptr = nullptr; + } + return out; + } + + /*! \brief Removes a given element from the queue and returns that element, + * or nullptr if it was not present + * \return pointer to the removed element, or nullptr if not present + */ + T* remove(T* that) { + if (!that) return nullptr; + T* cur = head; + T** next_link; + + if (head == that) { + head = *get_link_wrapped(*head); + + *get_link_wrapped(*that) = nullptr; + return that; + } + while (cur) { + next_link = get_link_wrapped(*cur); + if (*next_link == that) { + *next_link = *get_link_wrapped(**next_link); + + if (that == tail) { + tail = cur; + } + + *get_link_wrapped(*that) = nullptr; + return that; + } + cur = *next_link; + } + return nullptr; + } + + /// get an iterator to the first element + Queue::Iterator begin() { return Queue::Iterator(*this, head); } + + /// get an iterator that marks the end of list + Queue::Iterator end() { return Queue::Iterator(*this, nullptr); } + + /// get the first element of the queue + T* first() { return head; } + + /// get the last element of the queue + T* last() { return (head == nullptr ? nullptr : tail); } +}; + +/*! \brief Overload stream operator for list printing. + * + * With this a list can be printed. The elements itself are not printed, just + * the pointer. + */ +template +OutputStream& operator<<(OutputStream& os, Queue& queue) { + os << "{"; + for (typename Queue::Iterator it = queue.begin(); it != queue.end(); + ++it) { + os << *it; + if (it + 1 != queue.end()) { + os << ", "; + } + } + return os << "}"; +} diff --git a/object/stringbuffer.cc b/object/stringbuffer.cc new file mode 100644 index 0000000..c0f0903 --- /dev/null +++ b/object/stringbuffer.cc @@ -0,0 +1,3 @@ +#include "stringbuffer.h" + +void Stringbuffer::put(char c) { (void)c; } diff --git a/object/stringbuffer.h b/object/stringbuffer.h new file mode 100644 index 0000000..80bbf5f --- /dev/null +++ b/object/stringbuffer.h @@ -0,0 +1,71 @@ +/*! \file + * \brief \ref Stringbuffer composes single characters into a buffer + */ + +#pragma once +#include "../types.h" + +/*! \brief The class Stringbuffer composes single characters into a longer text + * that can be processed on block. + * + * To make Stringbuffer as versatile as possible, the class does make + * assumptions about neither the underlying hardware, nor the meaning of + * "processing". When flush() is called (i.e., either on explicit request or + * once the buffer is full). To be hardware independent, flush() is to be + * implemented by the derived classes. + * + * \par Hints for Implementation + * Use a buffer of fixed size for caching characters, which should be + * accessible by derived classes. + * Keep in mind that the derived implementation of flush() will need to know + * about numbers of characters in the buffer. + * + * \par Notes + * Reason for the existence of this class is that generating longer texts is + * often implemented by assembly of small fragments (such as single characters + * or numbers). + * However, writing such small fragments directly to (for example) screen is + * quite inefficient (e.g., due to the use of IO ports, syscalls, or locks) and + * can be improved drastically by delaying the output step until the assembly + * is finished (or the buffer runs full). + */ +class Stringbuffer { + // Prevent copies and assignments + Stringbuffer(const Stringbuffer&) = delete; + Stringbuffer& operator=(const Stringbuffer&) = delete; + + // All variables and methods are protected in this class, + // as the derived classes need direct access to be buffer, + // the constructor, the destructor, and the method put. + // flush() is to be implemented either way and may be redefined + // as public. + + protected: + /*! \brief Constructor; Marks the buffer as empty + */ + Stringbuffer() {} + + /*! \brief Inserts a character into the buffer. + * + * Once the buffer is full, a call to flush() will be issued and + * thereby clearing the buffer. + * + * \param c Char to be added + * + * \todo(11) Implement + */ + void put(char c); + + /*! \brief Flush the buffer contents + * + * This method is to be defined in derived classes, as only those know + * how to print characters. + * flush() is required to reset the position pos. + */ + virtual void flush() = 0; + + public: + /*! \brief Destructor (nothing to do here) + */ + virtual ~Stringbuffer() {} +}; diff --git a/sync/bellringer.cc b/sync/bellringer.cc new file mode 100644 index 0000000..3413b38 --- /dev/null +++ b/sync/bellringer.cc @@ -0,0 +1,29 @@ +#include "bellringer.h" + +#include "../interrupt/guard.h" +#include "../thread/thread.h" + +struct Bell { + // link pointer to the next bell in the bellringers bell list + Bell *queue_link[1] = {nullptr}; + + Thread *thread; + size_t counter; +}; + +Bell **Bellringer::bell_link(Bell &obj, unsigned link_index) { + return &obj.queue_link[link_index]; +} + +// check: Checks whether bells are running out of time and rings them if +// necessary +void Bellringer::check(Vault &vault) { (void)vault; } + +// job: Give a bell to the bellringer & ring it when the specified time ran out. +void Bellringer::sleep(Vault &vault, unsigned int ms) { + (void)vault; + (void)ms; +} + +// Are there bells in the queue? +bool Bellringer::bellPending() const { return false; } diff --git a/sync/bellringer.h b/sync/bellringer.h new file mode 100644 index 0000000..949f2c1 --- /dev/null +++ b/sync/bellringer.h @@ -0,0 +1,69 @@ +/*! \file + * \brief \ref Bellringer that manages and activates time-triggered activities. + */ + +#pragma once +#include "../object/queue.h" +#include "../types.h" + +struct Vault; +struct Bell; + +/*! \brief Manages and activates time-triggered activities. + * \ingroup ipc + * + * The Bellringer is regularly activated and checks whether any of the bells + * should ring. The bells are stored in a Queue that is managed by the + * Bellringer. A clever implementation avoids iterating through the whole list + * for every iteration by keeping the bells sorted and storing delta times. This + * approach leads to a complexity of O(1) for the method called by the timer + * interrupt in case no bells need to be rung. + */ +class Bellringer { + // Prevent copies and assignments + Bellringer(const Bellringer&) = delete; + Bellringer& operator=(const Bellringer&) = delete; + + /*! \brief List of bells currently managed. + * + * This list contains non-expired bells enqueued by job(). + * These bells will be checked on every call to check(). + * + * All elements that should be inserted into a Queue instance + * are required to be derived from Queue::Node. + */ + Queue bells; + + //! Link pointer for bells + static Bell** bell_link(Bell& obj, unsigned link_index); + + public: + // constructor + Bellringer() : bells(0, bell_link) {} + + /*! \brief Checks whether there are bells to be rung. + * + * Every call to check elapses a tick. Once such a tick reduces a bells + * remaining time to zero, the bell will be rung. + * + * \todo(16) Implement Method + */ + void check(Vault& vault); + + /*! \brief Passes a `bell` to the bellringer to be rung after `ms` + * milliseconds. + * \param bell Bell that should be rung after `ms` milliseconds + * \param ms number of milliseconds that should be waited before + * ringing the bell + * + * \todo(16) Implement Method + */ + void sleep(Vault& vault, unsigned int ms); + + /*! \brief Checks whether there are enqueued bells. + * \return true if there are enqueued bells, false otherwise + * + * \todo(16) Implement Method + */ + bool bellPending() const; +}; diff --git a/sync/semaphore.cc b/sync/semaphore.cc new file mode 100644 index 0000000..1b6e059 --- /dev/null +++ b/sync/semaphore.cc @@ -0,0 +1,16 @@ +#include "./semaphore.h" + +#include "../interrupt/guard.h" +#include "../thread/thread.h" + +Semaphore::Semaphore(unsigned c) { (void)c; } + +Thread **Semaphore::thread_link(Thread &obj, unsigned link_index) { + (void)obj; + (void)link_index; + return nullptr; +} + +void Semaphore::p(Vault &vault) { (void)vault; } + +void Semaphore::v(Vault &vault) { (void)vault; } diff --git a/sync/semaphore.h b/sync/semaphore.h new file mode 100644 index 0000000..35edce9 --- /dev/null +++ b/sync/semaphore.h @@ -0,0 +1,57 @@ +#pragma once +#include "../object/queue.h" +#include "../types.h" + +/*! \file + * \brief \ref Semaphore for synchronization of threads. + */ + +/*! + * \defgroup ipc Inter-Process Communication + * \brief Communication between threads + */ + +// Forward declarations to break cyclic includes +struct Vault; +class Thread; + +/*! \brief Semaphore used for synchronization of threads. + * \ingroup ipc + * + * The class Semaphore implements the concept of counting semaphores. + * The waiting list is provided by the base class Waitingroom. + */ +class Semaphore { + // Prevent copies and assignments + Semaphore(const Semaphore&) = delete; + Semaphore& operator=(const Semaphore&) = delete; + + static Thread** thread_link(Thread& obj, unsigned link_index); + + public: + /*! \brief Constructor; initialized the counter with provided value `c` + * \param c Initial counter value + * + * \todo(16) Implement Constructor + */ + explicit Semaphore(unsigned c = 0); + + /*! \brief Wait for access to the critical area. + * + * Enter/Wait operation: If the counter is greater than 0, then it is + * decremented by one. Otherwise the calling thread will be enqueued + * into the Waitingroom and marked as blocked. + * + * \todo(16) Implement Method + */ + void p(Vault& vault); + + /*! \brief Leave the critical area. + * + * Leave operation: If there are threads in the Waitingroom, wake the + * first one; otherwise increment the counter by one. + * + * \todo(16) Implement Method + */ + void v(Vault& vault); +}; diff --git a/sync/spinlock.h b/sync/spinlock.h new file mode 100644 index 0000000..ba73e41 --- /dev/null +++ b/sync/spinlock.h @@ -0,0 +1,60 @@ +/*! \file + * \brief Contains the class Spinlock + */ + +#pragma once +#include "../types.h" + +/*! \brief Using Spinlocks, it is possible to serialize blocks of code + * that might otherwise run in parallel on multiple CPU cores, + * or be interleaved due to interrupts or scheduling. + * + * \ingroup sync + * + * Synchronization is implemented using a lock variable. Once a thread enters + * the critical area, it sets the lock variable (to a non-zero value); when + * this thread leaves the critical area, it resets the lock variable to zero. + * Threads trying to enter an already locked critical area, actively wait, + * continuously checking until the critical area is free again. + * + * Use the following two GCC intrinsics + * - `bool __atomic_test_and_set(void *ptr, int memorder)` + * - `void __atomic_clear (bool *ptr, int memorder)` + * + * These intrinsics are translated into atomic, architecture-specific + * CPU instructions. + * + * If you want that things just work, choose __ATOMIC_SEQ_CST as memorder. + * This is not the most efficient memory order but works reasonably well. + * + * Atomic + * Builtins in GCC manual + */ +class Spinlock { + // Prevent copies and assignments + Spinlock(const Spinlock& copy) = delete; + Spinlock& operator=(const Spinlock&) = delete; + + public: + /*! \brief Constructor; Initializes as unlocked. + * + * \todo(12) Complete Constructor (for \MPStuBS, or use \ref Ticketlock) + * + */ + consteval Spinlock() {} + + /*! \brief Enters the critical area. In case the area is already locked, + * \ref lock() will actively wait until the area can be entered. + * + * \see \ref Core::pause() + * \todo(12) Implement Method (for \MPStuBS, or use \ref Ticketlock) + */ + void lock() {} + + /*! \brief Unblocks the critical area. + * + * \todo(12) Implement Method (for \MPStuBS, or use \ref Ticketlock) + */ + void unlock() {} +}; diff --git a/sync/ticketlock.h b/sync/ticketlock.h new file mode 100644 index 0000000..5414ebe --- /dev/null +++ b/sync/ticketlock.h @@ -0,0 +1,52 @@ +/*! \file + * \brief Contains the class Ticketlock + */ + +#pragma once + +/*! \brief Using Ticketlocks, it is possible to serialize blocks of code + * that might otherwise run in parallel on multiple CPU cores, + * or be interleaved due to interrupts or scheduling. + * + * \ingroup sync + * + * Synchronization is implemented using a lock and a ticket variable. + * Once a thread tries to enter the critical area, it obtains a ticket by + * atomically incrementing the ticket variable and waiting until the lock + * counter reaches this ticket, if it is not there already. + * When a thread leaves the critical area, it increments the lock variable by + * one and thereby allows the next thread to enter the critical area. + * + * If you want that things just work, choose __ATOMIC_SEQ_CST as memorder. + * This is not the most efficient memory order but works reasonably well. + * + * Atomic + * Builtins in GCC manual + */ +class Ticketlock { + // Prevent copies and assignments + Ticketlock(const Ticketlock& copy) = delete; + Ticketlock& operator=(const Ticketlock&) = delete; + + public: + /*! \brief Constructor + * + * \todo(12) Complete Constructor (for \MPStuBS) + */ + consteval Ticketlock() {} + + /*! \brief Enters the critical area. In case the area is already locked, + * \ref lock() will actively wait until the area can be entered. + * + * \see \ref Core::pause() + * \todo(12) Implement Method (for \MPStuBS) + */ + void lock() {} + + /*! \brief Unblocks the critical area. + * + * \todo(12) Implement Method (for \MPStuBS) + */ + void unlock() {} +}; diff --git a/thread/dispatcher.cc b/thread/dispatcher.cc new file mode 100644 index 0000000..5044a50 --- /dev/null +++ b/thread/dispatcher.cc @@ -0,0 +1,17 @@ +// vim: set noet ts=4 sw=4: + +#include "dispatcher.h" + +Dispatcher::Dispatcher() {} + +Thread *Dispatcher::active() { return nullptr; } + +bool Dispatcher::isActive(const Thread *thread, unsigned *cpu) { + (void)thread; + (void)cpu; + return false; +} + +void Dispatcher::go(Thread *first) { (void)first; } + +void Dispatcher::dispatch(Thread *next) { (void)next; } diff --git a/thread/dispatcher.h b/thread/dispatcher.h new file mode 100644 index 0000000..481740e --- /dev/null +++ b/thread/dispatcher.h @@ -0,0 +1,66 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief \ref Dispatcher for \ref Thread threads + */ +#pragma once +#include "../thread/thread.h" +#include "../types.h" + +/*! \brief The dispatcher dispatches threads and puts the scheduler's + * decisions into action. + * \ingroup thread + * + * The dispatcher manages the life pointer that refers to the currently + * active thread and performs the actual switching of processes. + * For single-core systems, a single life pointer is sufficient, as only a + * single thread can be active at any one time. On multi-core systems, + * every CPU core needs its own life pointer. + */ +class Dispatcher { + /*! \brief set the currently active thread + * \param thread active Thread + */ + void setActive(Thread* thread) { (void)thread; } + + public: + /*! \brief constructor + * + * \todo(14) Implement Method + */ + Dispatcher(); + + /*! \brief Returns the thread currently running on the CPU core calling + * this method + * + * \todo(14) Implement Method + */ + Thread* active(); + + /*! \brief Check if thread is currently active + * \param thread Pointer to the thread in question + * \param cpu will receive the core number if `cpu` pointer is not `nullptr` + * and the thread is currently active + * \return `true` if currently running, false otherwise + * + * \todo(15) Implement method for kill IPI (in \MPStuBS only) + */ + bool isActive(const Thread* thread, unsigned* cpu = nullptr); + + /*! \brief This method stores first as life pointer for this CPU core and + * triggers the execution of first. Only to be used for the first thread + * running on a CPU. + * \param first First thread to be executed on this CPU core. + * + * \todo(14) Implement Method + */ + void go(Thread* first); + + /*! \brief Updates the life pointer to next and issues a thread change from + * the old to the new life pointer. + * \param next Next thread to be executed. + * + * \todo(14) Implement Method + */ + void dispatch(Thread* next); +}; diff --git a/thread/idlethread.cc b/thread/idlethread.cc new file mode 100644 index 0000000..2d235c8 --- /dev/null +++ b/thread/idlethread.cc @@ -0,0 +1,3 @@ +#include "idlethread.h" + +void IdleThread::action() {} diff --git a/thread/idlethread.h b/thread/idlethread.h new file mode 100644 index 0000000..84ba023 --- /dev/null +++ b/thread/idlethread.h @@ -0,0 +1,27 @@ +/*! \file + * \brief \ref IdleThread executed by the \ref Scheduler if no other \ref + * Thread is ready + */ + +#pragma once +#include "../types.h" +#include "thread.h" + +/*! \brief Thread that is executed when there is nothing to do for this core. + * \ingroup thread + * + * Using the IdleThread simplifies the idea of waiting and is an answer to the + * questions that arise once the ready queue is empty. + * + * \note Instance of this class should *never* be inserted into the scheduler's + * ready queue, as the IdleThread should only be executed if there is no + * proper work to do. + */ +class IdleThread : public Thread { + public: + /*! \brief Wait for a thread to become ready and sleep in the meantime. + * + * \todo(16) Implement Method + */ + void action() override; +}; diff --git a/thread/scheduler.cc b/thread/scheduler.cc new file mode 100644 index 0000000..8a6f78c --- /dev/null +++ b/thread/scheduler.cc @@ -0,0 +1,27 @@ +// vim: set noet ts=4 sw=4: + +#include "scheduler.h" + +Scheduler::Scheduler() {} + +Thread* Scheduler::getNext() { return nullptr; } + +void Scheduler::schedule() {} + +void Scheduler::ready(Thread* that) { (void)that; } + +void Scheduler::resume(bool ready) { (void)ready; } + +void Scheduler::exit() {} + +void Scheduler::kill(Thread* that) { (void)that; } + +bool Scheduler::isActive(const Thread* that, unsigned int* cpu) { + (void)that; + (void)cpu; + return false; +} + +bool Scheduler::isEmpty() const { return false; } + +void Scheduler::setIdle(IdleThread* that) { (void)that; } diff --git a/thread/scheduler.h b/thread/scheduler.h new file mode 100644 index 0000000..ece07bc --- /dev/null +++ b/thread/scheduler.h @@ -0,0 +1,120 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * + * \brief \ref Scheduler to manage the \ref Thread "threads" + */ + +#pragma once +#include "../types.h" +#include "dispatcher.h" +#include "idlethread.h" +#include "thread.h" + +/*! \brief The scheduler plans the threads' execution order and, from this, + * selects the next thread to be running. + * \ingroup thread + * + * The scheduler manages the ready queue (a private \ref Queue object), + * that is the list of threads that are ready to execute. The scheduler + * arranges threads in a FIFO order, that is, when a thread is set ready, it + * will be appended to the end of the queue, while threads to be executed are + * taken from the front of the queue. + */ +class Scheduler { + /*! \brief a Dispatcher object, providing the low level context switching + * routines. + */ + Dispatcher dispatcher; + + /*! \brief Helper to retrieve next Thread + * \return pointer of next thread + */ + Thread* getNext(); + + public: + Scheduler(); + + /*! \brief Start scheduling + * + * This method starts the scheduling by removing the first thread from + * the ready queue and activating it. \MPStuBS needs to call this method + * once for every CPU core to dispatch the first thread. + * + * \todo(14) Implement Method + */ + void schedule(); + + /*! \brief Include a thread in scheduling decisions. + * + * This method will register a thread for scheduling. It will be appended + * to the ready queue and dispatched once its time has come. + * \param that \ref Thread to be scheduled + * + * \todo(14) Implement Method + */ + void ready(Thread* that); + + /*! \brief (Self-)termination of the calling thread. + * + * This method can be used by a thread to exit itself. The calling + * thread will not be appended to the ready queue; a reschedule will be + * issued. + * + * \todo(14) Implement Method + */ + void exit(); + + /*! \brief Kills the passed thread + * + * This method is used to kill the \ref Thread `that`. + * For \OOStuBS, it is sufficient to remove `that` from the ready queue + * and, thereby, exclude the thread from scheduling. + * For \MPStuBS, a simple removal is not sufficient, as the thread might + * currently be running on another CPU core. In this case, the thread needs + * to be marked as *dying* (a flag checked by resume prior to enqueuing + * into the ready queue) + */ + /*! + * Note: The thread should be able to kill itself. + * + * \todo(14) Implement Method + * + * \todo(15) Adapt method (for MPStuBS) + */ + void kill(Thread* that); + + /*! \brief Issue a thread change + * + * This method issues the change of the currently active thread without + * requiring the calling thread to be aware of the other threads. + * Scheduling decisions, i.e. which thread will be run next, are made by + * the scheduler itself with the knowledge of the currently ready threads. + * The currently active thread is appended to the end of the queue; the + * first thread in the queue will be activated (to implement the FIFO + * policy). + * + * \todo(14) Implement Method + */ + void resume(bool ready = true); + + /*! \brief return the active thread from the dispatcher + */ + Thread* active() { return dispatcher.active(); } + + /// \copydoc Dispatcher::isActive + bool isActive(const Thread* that, unsigned int* cpu = nullptr); + + /*! \brief Checks whether the ready queue is empty. + * + * \todo(16) Implement Method + */ + bool isEmpty() const; + + /*! \brief Set the idle thread for the executing CPU + * \param[in] that the idle thread to use for the executing CPU + * + * \todo(16) Implement Method + */ + void setIdle(IdleThread* that); +}; diff --git a/thread/thread.cc b/thread/thread.cc new file mode 100644 index 0000000..1fa8f6f --- /dev/null +++ b/thread/thread.cc @@ -0,0 +1,16 @@ +// vim: set noet ts=4 sw=4: + +#include "thread.h" + +// Alias to simplify stuff +typedef void (*kickoff_t)(void*); + +void Thread::kickoff(Thread* object) { (void)object; } + +Thread::Thread(void* tos) { (void)tos; } + +void Thread::resume(Thread* next) { (void)next; } + +void Thread::go() {} + +void Thread::action() {} diff --git a/thread/thread.h b/thread/thread.h new file mode 100644 index 0000000..5eda83f --- /dev/null +++ b/thread/thread.h @@ -0,0 +1,86 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief \ref Thread abstraction required for multithreading + */ + +/*! \defgroup thread Multithreading + * \brief The Multithreading Subsystem + * + * The group Multithreading contains all elements that form the foundation + * of CPU multiplexing. This module's objective is to provide the abstraction + * thread that provides a virtualised CPU for the user's applications. + */ + +#pragma once +#include "../arch/context.h" +#include "../object/queue.h" +#include "../types.h" + +/*! \brief The Thread is an object used by the scheduler. + * \ingroup thread + */ +class Thread { + protected: + /*! \brief The thread's entry point. + * + * For the first activation of a thread, we need a "return address" + * pointing to a function that will take care of calling C++ virtual + * methods (e.g. \ref action()), based on the thread object pointer. + * For this purpose, we use this `kickoff()` function. + * + * \note As this function is never actually called, but only executed by + * returning from the co-routine's initial stack, it may never return. + * Otherwise garbage values from the stack will be interpreted as + * return address and the system might crash. + * + * \param object Thread to be started + */ + /// \todo(14) Implement Method + static void kickoff(Thread* object); + + public: + /*! \brief Marker for a dying thread + */ + volatile bool kill_flag; + + /*! \brief Constructor + * Initializes the context using \ref prepareContext with the given stack + * space. + * + * \param tos the top of stack, highest address of some memory block that + * should be used as stack (remember stacks grow to the lower addresses on + * x86). + * + * \todo(14) Implement constructor + */ + explicit Thread(void* tos); + + /*! \brief Activates the first thread on this CPU. + * + * Calling the method starts the first thread on the calling CPU. + * From then on, \ref Thread::resume() must be used for all subsequent + * context switches. + * + * \todo(14) Implement Method + */ + void go(); + + /*! \brief Switches from the currently running thread to the `next` one. + * + * The values currently present in the callee-saved registers will be + * stored in this threads context-structure, the corresponding values + * belonging to `next` thread will be loaded. + * \param next Pointer to the next thread. + * + * \todo(14) Implement Method + */ + void resume(Thread* next); + + /*! \brief Method that contains the thread's program code. + * + * Derived classes are meant to override this method to provide + * meaningful code to be run in this thread. + */ + virtual void action(); +}; diff --git a/tools/build.mk b/tools/build.mk new file mode 100644 index 0000000..5109602 --- /dev/null +++ b/tools/build.mk @@ -0,0 +1,146 @@ +# Build the kernel + +# Top Level Folder all generated files will be placed in. +ROOTBUILDDIR = build +# Folder the generated files will be placed in. +BUILDDIR ?= $(ROOTBUILDDIR) +# Build folder suffixes +OPTTAG = opt +NOOPTTAG = noopt +DBGTAG = dbg +VERBOSETAG = verbose + +# C++ +CXX ?= $(PREFIX)clang++ +# on macos the default compiler is called c++, switch to clang++ +ifeq ($(shell uname),Darwin) + ifeq ($(CXX),c++) + CXX := clang++ + endif +endif + +CXXFLAGS_ARCH = -m64 +CXXFLAGS_DEFAULT = -std=c++23 -ffreestanding -fno-pic -nodefaultlibs -nostdlib -nostdinc -I. -fno-rtti -fno-exceptions -Wno-write-strings -fno-stack-protector -mno-red-zone -g -gdwarf-2 -fno-use-cxa-atexit -no-pie -nostartfiles +# Enable 16-byte compare-and-exchange instruction for debugging purposes (stack alignment) +CXXFLAGS_DEFAULT += -mcx16 +CXXFLAGS_OPT = -O3 -fomit-frame-pointer +CXXFLAGS_WARNING = -Wall -Wextra -Werror -Wno-error=unused-parameter -Wno-error=unused-variable -Wno-non-virtual-dtor +CXXFLAGS_CLANG = -Wno-error=unused-private-field -Wno-implicit-exception-spec-mismatch -Wno-error=unused-const-variable -Wno-unused-command-line-argument -Wno-unused-const-variable -fno-strict-aliasing +ifeq ($(shell uname),Darwin) + CXXFLAGS_CLANG += -target x86_64-pc-linux-gnu -fuse-ld=lld +endif +CXXFLAGS_GCC = -fno-tree-loop-distribute-patterns -Wstack-usage=4096 -Wno-error=stack-usage= +CXXFLAGS_NOFPU = -mno-mmx -mno-sse -mgeneral-regs-only +CXXFLAGS = $(CXXFLAGS_ARCH) $(CXXFLAGS_DEFAULT) $(CXXFLAGS_OPT) $(CXXFLAGS_NOFPU) $(CXXFLAGS_WARNING) +# Compiler specific flags +ifneq (,$(findstring clang,$(CXX))) + COMPILER := CLANG + CXXFLAGS += $(CXXFLAGS_CLANG) +else ifneq (,$(findstring g++,$(CXX))) + COMPILER := GCC + # g++ 6 does not support general-regs-only flag + ifeq "$(shell expr `$(CXX) -dumpversion | cut -f1 -d.` \<= 6)" "1" + CXXFLAGS := $(filter-out -mgeneral-regs-only,$(CXXFLAGS)) + endif + # Configures -Warray-bounds warning to allow accessing in the NULL page + # Necessary to read the Extended BIOS Data Area at 0x40e + ifeq "$(shell expr `$(CXX) -dumpversion | cut -f1 -d.` \>= 12)" "1" + CXXFLAGS += --param=min-pagesize=0x40e + endif + CXXFLAGS += $(CXXFLAGS_GCC) +else + COMPILER := +endif + +# Assembly +ASM = nasm +ASMFLAGS = -f elf64 + +# Additional build utilities +# If llvm tools are available, use them +ifneq (,$(shell which llvm-objcopy 2>/dev/null)) + PREFIX = llvm- +endif +OBJCOPY = $(PREFIX)objcopy +STRIP = $(PREFIX)strip +AR = $(PREFIX)ar + +# Subdirectories with sources +VPATH = $(sort $(dir $(CC_SOURCES) $(ASM_SOURCES))) + +# Lists of object files that are generated by compilation: +# Note that the variables containing the input files are to be defined by +# the Makefiles prior to including this common.mk. +CC_OBJECTS = $(addprefix $(BUILDDIR)/,$(CC_SOURCES:.cc=.o)) +DEP_FILES = $(addprefix $(BUILDDIR)/,$(CC_SOURCES:.cc=.d) $(addsuffix .d,$(ASM_SOURCES))) +ASM_OBJECTS = $(addprefix $(BUILDDIR)/,$(addsuffix .o,$(ASM_SOURCES))) + +# Dependency files +$(BUILDDIR)/%.d : %.cc $(MAKEFILE_LIST) + @echo "DEP $<" + @mkdir -p $(@D) + $(VERBOSE) $(CXX) $(CXXFLAGS) -MM -MT $(BUILDDIR)/$*.o -MF $@ $< + +$(BUILDDIR)/%.asm.d : %.asm $(MAKEFILE_LIST) + @echo "DEP $<" + @mkdir -p $(@D) + $(VERBOSE) $(ASM) $(ASMFLAGS) -M -MT $(BUILDDIR)/$*.asm.o -MF $@ $< + +# Object files +$(BUILDDIR)/%.o : %.cc $(MAKEFILE_LIST) + @echo "CXX $<" + @mkdir -p $(@D) + $(VERBOSE) $(CXX) -c $(CXXFLAGS) -o $@ $< + +$(BUILDDIR)/%.asm.o : %.asm $(MAKEFILE_LIST) + @echo "ASM $<" + @mkdir -p $(@D) + $(VERBOSE) $(ASM) $(ASMFLAGS) -o $@ $< + +# The standard target 'clean' removes the whole generated system, the object files, and the dependency files. +clean:: + rm -rf "$(BUILDDIR)" + +# Target issuing a nested call to make generating a fully optimized systems without assertions. +%-$(OPTTAG): + $(VERBOSE) $(MAKE) BUILDDIR="$(BUILDDIR)/$(OPTTAG)" ISODIR="$(ISODIR)/$(OPTTAG)" CXXFLAGS_OPT="-Ofast -fomit-frame-pointer -flto -march=westmere -DNDEBUG" $* + +# Target issuing a nested call to make generating a non-optimized system. +%-$(NOOPTTAG): + $(VERBOSE) $(MAKE) BUILDDIR="$(BUILDDIR)/$(NOOPTTAG)" ISODIR="$(ISODIR)/$(NOOPTTAG)" CXXFLAGS_OPT="-O0" $* + +# Target issuing a nested call to make generating a system optimized for debugging. +%-$(DBGTAG): + $(VERBOSE) $(MAKE) BUILDDIR="$(BUILDDIR)/$(DBGTAG)" ISODIR="$(ISODIR)/$(DBGTAG)" CXXFLAGS_OPT="-Og -fno-omit-frame-pointer" $* + +# Target issuing a nested call to make generating a system with verbose output. +%-$(VERBOSETAG): + $(VERBOSE) $(MAKE) BUILDDIR="$(BUILDDIR)/$(VERBOSETAG)" ISODIR="$(ISODIR)/$(VERBOSETAG)" CXXFLAGS_OPT="-DVERBOSE" $* + +# Documentation +help:: + @$(echo) "" \ + "All targets exist in different flavours in addition to \e[2;3m\e[0m:\n" \ + "\e[2;3m\e[0;3m-noopt\e[0m, \e[2;3m\e[0;3m-opt\e[0m, \e[2;3m\e[0;3m-dbg\e[0m, and \e[2;3m\e[0;3m-verbose\e[0m.\n" \ + "Targets suffixed with \e[3m-noopt\e[0m are compiled without optimizations,\n" \ + "\e[3m-opt\e[0m targets produce a highly optimized binary, while\n" \ + "\e[3m-dbg\e[0m targets only use optimizations not hindering debugging.\n" \ + "Targets suffixed with \e[3m-verbose\e[0m generate binaries including\n" \ + "verbose output (via \e[3mDBG_VERBOSE\e[0m), making such targets useful for debugging.\n" \ + "To get a verbose make output, clear VERBOSE, e.g. \e[3mmake VERBOSE=\e[0m.\n" \ + "The following targets are available (each target can be suffixed by \e[3m-noopt\e[0m\n" \ + "and \e[3m-verbose\e[0m):\n\n" \ + " \e[3mall\e[0m Builds $(PROJECT), generating an ELF binary\n\n" + +# Print warnings, if appropriate +ifeq (,$(COMPILER)) +$(warning Unknown (and potentially unsupported) compiler "$(CXX)"!) +endif + +# Include dependency files (generated via gcc flag -MM) +ifneq ($(MAKECMDGOALS),clean) +-include $(DEP_FILES) +endif + +# Phony targets +.PHONY: clean help diff --git a/tools/common.mk b/tools/common.mk new file mode 100644 index 0000000..acbb347 --- /dev/null +++ b/tools/common.mk @@ -0,0 +1,33 @@ +# Common include Makefile + +# Hide commands +VERBOSE = @ +# Prefix for toolchain binaries +PREFIX ?= +# Project name +PROJECT ?= "MPStuBS" + +help:: + @$(echo) "\n" \ + "\e[1mMAKEFILE for the teaching operating system $(PROJECT)\e[0m\n" \ + "--------------------------------------------------\n\n" \ + "Executing '\e[4mmake\e[0m' will compile the operating system from source.\n" + +# Get current directory path +CURRENT_DIR := $(dir $(lastword $(MAKEFILE_LIST))) + +# Include Makefile scripts +include $(CURRENT_DIR)/build.mk +include $(CURRENT_DIR)/qemu.mk +include $(CURRENT_DIR)/image.mk +include $(CURRENT_DIR)/linter.mk +include $(CURRENT_DIR)/remote.mk + +# Disable buitlin rules +MAKEFLAGS += --no-builtin-rules + +# Disable buitlin rules +MAKEFLAGS += --no-print-directory + +# Disable buitlin suffixes +.SUFFIXES: diff --git a/tools/cpplint.py b/tools/cpplint.py new file mode 100644 index 0000000..a004746 --- /dev/null +++ b/tools/cpplint.py @@ -0,0 +1,6771 @@ +#!/usr/bin/env python +# +# Copyright (c) 2009 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Does google-lint on c++ files. + +The goal of this script is to identify places in the code that *may* +be in non-compliance with google style. It does not attempt to fix +up these problems -- the point is to educate. It does also not +attempt to find all problems, or to ensure that everything it does +find is legitimately a problem. + +In particular, we can get very confused by /* and // inside strings! +We do a small hack, which is to ignore //'s with "'s after them on the +same line, but it is far from perfect (in either direction). +""" + +import codecs +import copy +import getopt +import glob +import itertools +import math # for log +import os +import re +import sre_compile +import string +import sys +import sysconfig +import unicodedata +import xml.etree.ElementTree + +# if empty, use defaults +_valid_extensions = set([]) + +__VERSION__ = '1.5.2' + +try: + xrange # Python 2 +except NameError: + # -- pylint: disable=redefined-builtin + xrange = range # Python 3 + +_USAGE = """ +Syntax: cpplint.py [--verbose=#] [--output=emacs|eclipse|vs7|junit|sed|gsed] + [--filter=-x,+y,...] + [--counting=total|toplevel|detailed] [--root=subdir] + [--repository=path] + [--linelength=digits] [--headers=x,y,...] + [--recursive] + [--exclude=path] + [--extensions=hpp,cpp,...] + [--includeorder=default|standardcfirst] + [--quiet] + [--version] + [file] ... + + Style checker for C/C++ source files. + This is a fork of the Google style checker with minor extensions. + + The style guidelines this tries to follow are those in + https://google.github.io/styleguide/cppguide.html + + Every problem is given a confidence score from 1-5, with 5 meaning we are + certain of the problem, and 1 meaning it could be a legitimate construct. + This will miss some errors, and is not a substitute for a code review. + + To suppress false-positive errors of a certain category, add a + 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) + suppresses errors of all categories on that line. + + The files passed in will be linted; at least one file must be provided. + Default linted extensions are %s. + Other file types will be ignored. + Change the extensions with the --extensions flag. + + Flags: + + output=emacs|eclipse|vs7|junit|sed|gsed + By default, the output is formatted to ease emacs parsing. Visual Studio + compatible output (vs7) may also be used. Further support exists for + eclipse (eclipse), and JUnit (junit). XML parsers such as those used + in Jenkins and Bamboo may also be used. + The sed format outputs sed commands that should fix some of the errors. + Note that this requires gnu sed. If that is installed as gsed on your + system (common e.g. on macOS with homebrew) you can use the gsed output + format. Sed commands are written to stdout, not stderr, so you should be + able to pipe output straight to a shell to run the fixes. + + verbose=# + Specify a number 0-5 to restrict errors to certain verbosity levels. + Errors with lower verbosity levels have lower confidence and are more + likely to be false positives. + + quiet + Don't print anything if no errors are found. + + filter=-x,+y,... + Specify a comma-separated list of category-filters to apply: only + error messages whose category names pass the filters will be printed. + (Category names are printed with the message and look like + "[whitespace/indent]".) Filters are evaluated left to right. + "-FOO" and "FOO" means "do not print categories that start with FOO". + "+FOO" means "do print categories that start with FOO". + + Examples: --filter=-whitespace,+whitespace/braces + --filter=whitespace,runtime/printf,+runtime/printf_format + --filter=-,+build/include_what_you_use + + To see a list of all the categories used in cpplint, pass no arg: + --filter= + + counting=total|toplevel|detailed + The total number of errors found is always printed. If + 'toplevel' is provided, then the count of errors in each of + the top-level categories like 'build' and 'whitespace' will + also be printed. If 'detailed' is provided, then a count + is provided for each category like 'build/class'. + + repository=path + The top level directory of the repository, used to derive the header + guard CPP variable. By default, this is determined by searching for a + path that contains .git, .hg, or .svn. When this flag is specified, the + given path is used instead. This option allows the header guard CPP + variable to remain consistent even if members of a team have different + repository root directories (such as when checking out a subdirectory + with SVN). In addition, users of non-mainstream version control systems + can use this flag to ensure readable header guard CPP variables. + + Examples: + Assuming that Alice checks out ProjectName and Bob checks out + ProjectName/trunk and trunk contains src/chrome/ui/browser.h, then + with no --repository flag, the header guard CPP variable will be: + + Alice => TRUNK_SRC_CHROME_BROWSER_UI_BROWSER_H_ + Bob => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + If Alice uses the --repository=trunk flag and Bob omits the flag or + uses --repository=. then the header guard CPP variable will be: + + Alice => SRC_CHROME_BROWSER_UI_BROWSER_H_ + Bob => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + root=subdir + The root directory used for deriving header guard CPP variable. + This directory is relative to the top level directory of the repository + which by default is determined by searching for a directory that contains + .git, .hg, or .svn but can also be controlled with the --repository flag. + If the specified directory does not exist, this flag is ignored. + + Examples: + Assuming that src is the top level directory of the repository (and + cwd=top/src), the header guard CPP variables for + src/chrome/browser/ui/browser.h are: + + No flag => CHROME_BROWSER_UI_BROWSER_H_ + --root=chrome => BROWSER_UI_BROWSER_H_ + --root=chrome/browser => UI_BROWSER_H_ + --root=.. => SRC_CHROME_BROWSER_UI_BROWSER_H_ + + linelength=digits + This is the allowed line length for the project. The default value is + 80 characters. + + Examples: + --linelength=120 + + recursive + Search for files to lint recursively. Each directory given in the list + of files to be linted is replaced by all files that descend from that + directory. Files with extensions not in the valid extensions list are + excluded. + + exclude=path + Exclude the given path from the list of files to be linted. Relative + paths are evaluated relative to the current directory and shell globbing + is performed. This flag can be provided multiple times to exclude + multiple files. + + Examples: + --exclude=one.cc + --exclude=src/*.cc + --exclude=src/*.cc --exclude=test/*.cc + + extensions=extension,extension,... + The allowed file extensions that cpplint will check + + Examples: + --extensions=%s + + includeorder=default|standardcfirst + For the build/include_order rule, the default is to blindly assume angle + bracket includes with file extension are c-system-headers (default), + even knowing this will have false classifications. + The default is established at google. + standardcfirst means to instead use an allow-list of known c headers and + treat all others as separate group of "other system headers". The C headers + included are those of the C-standard lib and closely related ones. + + headers=x,y,... + The header extensions that cpplint will treat as .h in checks. Values are + automatically added to --extensions list. + (by default, only files with extensions %s will be assumed to be headers) + + Examples: + --headers=%s + --headers=hpp,hxx + --headers=hpp + + cpplint.py supports per-directory configurations specified in CPPLINT.cfg + files. CPPLINT.cfg file can contain a number of key=value pairs. + Currently the following options are supported: + + set noparent + filter=+filter1,-filter2,... + exclude_files=regex + linelength=80 + root=subdir + headers=x,y,... + + "set noparent" option prevents cpplint from traversing directory tree + upwards looking for more .cfg files in parent directories. This option + is usually placed in the top-level project directory. + + The "filter" option is similar in function to --filter flag. It specifies + message filters in addition to the |_DEFAULT_FILTERS| and those specified + through --filter command-line flag. + + "exclude_files" allows to specify a regular expression to be matched against + a file name. If the expression matches, the file is skipped and not run + through the linter. + + "linelength" allows to specify the allowed line length for the project. + + The "root" option is similar in function to the --root flag (see example + above). Paths are relative to the directory of the CPPLINT.cfg. + + The "headers" option is similar in function to the --headers flag + (see example above). + + CPPLINT.cfg has an effect on files in the same directory and all + sub-directories, unless overridden by a nested configuration file. + + Example file: + filter=-build/include_order,+build/include_alpha + exclude_files=.*\\.cc + + The above example disables build/include_order warning and enables + build/include_alpha as well as excludes all .cc from being + processed by linter, in the current directory (where the .cfg + file is located) and all sub-directories. +""" + +# We categorize each error message we print. Here are the categories. +# We want an explicit list so we can list them all in cpplint --filter=. +# If you add a new error message with a new category, add it to the list +# here! cpplint_unittest.py should tell you if you forget to do this. +_ERROR_CATEGORIES = [ + 'build/class', + 'build/c++11', + 'build/c++14', + 'build/c++tr1', + 'build/deprecated', + 'build/endif_comment', + 'build/explicit_make_pair', + 'build/forward_decl', + 'build/header_guard', + 'build/include', + 'build/include_subdir', + 'build/include_alpha', + 'build/include_order', + 'build/include_what_you_use', + 'build/namespaces_headers', + 'build/namespaces_literals', + 'build/namespaces', + 'build/printf_format', + 'build/storage_class', + 'legal/copyright', + 'readability/alt_tokens', + 'readability/braces', + 'readability/casting', + 'readability/check', + 'readability/constructors', + 'readability/fn_size', + 'readability/inheritance', + 'readability/multiline_comment', + 'readability/multiline_string', + 'readability/namespace', + 'readability/nolint', + 'readability/nul', + 'readability/strings', + 'readability/todo', + 'readability/utf8', + 'runtime/arrays', + 'runtime/casting', + 'runtime/explicit', + 'runtime/int', + 'runtime/init', + 'runtime/invalid_increment', + 'runtime/member_string_references', + 'runtime/memset', + 'runtime/indentation_namespace', + 'runtime/operator', + 'runtime/printf', + 'runtime/printf_format', + 'runtime/references', + 'runtime/string', + 'runtime/threadsafe_fn', + 'runtime/vlog', + 'whitespace/blank_line', + 'whitespace/braces', + 'whitespace/comma', + 'whitespace/comments', + 'whitespace/empty_conditional_body', + 'whitespace/empty_if_body', + 'whitespace/empty_loop_body', + 'whitespace/end_of_line', + 'whitespace/ending_newline', + 'whitespace/forcolon', + 'whitespace/indent', + 'whitespace/line_length', + 'whitespace/newline', + 'whitespace/operators', + 'whitespace/parens', + 'whitespace/semicolon', + 'whitespace/tab', + 'whitespace/todo', + ] + +# These error categories are no longer enforced by cpplint, but for backwards- +# compatibility they may still appear in NOLINT comments. +_LEGACY_ERROR_CATEGORIES = [ + 'readability/streams', + 'readability/function', + ] + +# The default state of the category filter. This is overridden by the --filter= +# flag. By default all errors are on, so only add here categories that should be +# off by default (i.e., categories that must be enabled by the --filter= flags). +# All entries here should start with a '-' or '+', as in the --filter= flag. +_DEFAULT_FILTERS = ['-build/include_alpha'] + +# The default list of categories suppressed for C (not C++) files. +_DEFAULT_C_SUPPRESSED_CATEGORIES = [ + 'readability/casting', + ] + +# The default list of categories suppressed for Linux Kernel files. +_DEFAULT_KERNEL_SUPPRESSED_CATEGORIES = [ + 'whitespace/tab', + ] + +# We used to check for high-bit characters, but after much discussion we +# decided those were OK, as long as they were in UTF-8 and didn't represent +# hard-coded international strings, which belong in a separate i18n file. + +# C++ headers +_CPP_HEADERS = frozenset([ + # Legacy + 'algobase.h', + 'algo.h', + 'alloc.h', + 'builtinbuf.h', + 'bvector.h', + 'complex.h', + 'defalloc.h', + 'deque.h', + 'editbuf.h', + 'fstream.h', + 'function.h', + 'hash_map', + 'hash_map.h', + 'hash_set', + 'hash_set.h', + 'hashtable.h', + 'heap.h', + 'indstream.h', + 'iomanip.h', + 'iostream.h', + 'istream.h', + 'iterator.h', + 'list.h', + 'map.h', + 'multimap.h', + 'multiset.h', + 'ostream.h', + 'pair.h', + 'parsestream.h', + 'pfstream.h', + 'procbuf.h', + 'pthread_alloc', + 'pthread_alloc.h', + 'rope', + 'rope.h', + 'ropeimpl.h', + 'set.h', + 'slist', + 'slist.h', + 'stack.h', + 'stdiostream.h', + 'stl_alloc.h', + 'stl_relops.h', + 'streambuf.h', + 'stream.h', + 'strfile.h', + 'strstream.h', + 'tempbuf.h', + 'tree.h', + 'type_traits.h', + 'vector.h', + # 17.6.1.2 C++ library headers + 'algorithm', + 'array', + 'atomic', + 'bitset', + 'chrono', + 'codecvt', + 'complex', + 'condition_variable', + 'deque', + 'exception', + 'forward_list', + 'fstream', + 'functional', + 'future', + 'initializer_list', + 'iomanip', + 'ios', + 'iosfwd', + 'iostream', + 'istream', + 'iterator', + 'limits', + 'list', + 'locale', + 'map', + 'memory', + 'mutex', + 'new', + 'numeric', + 'ostream', + 'queue', + 'random', + 'ratio', + 'regex', + 'scoped_allocator', + 'set', + 'sstream', + 'stack', + 'stdexcept', + 'streambuf', + 'string', + 'strstream', + 'system_error', + 'thread', + 'tuple', + 'typeindex', + 'typeinfo', + 'type_traits', + 'unordered_map', + 'unordered_set', + 'utility', + 'valarray', + 'vector', + # 17.6.1.2 C++14 headers + 'shared_mutex', + # 17.6.1.2 C++17 headers + 'any', + 'charconv', + 'codecvt', + 'execution', + 'filesystem', + 'memory_resource', + 'optional', + 'string_view', + 'variant', + # 17.6.1.2 C++ headers for C library facilities + 'cassert', + 'ccomplex', + 'cctype', + 'cerrno', + 'cfenv', + 'cfloat', + 'cinttypes', + 'ciso646', + 'climits', + 'clocale', + 'cmath', + 'csetjmp', + 'csignal', + 'cstdalign', + 'cstdarg', + 'cstdbool', + 'cstddef', + 'cstdint', + 'cstdio', + 'cstdlib', + 'cstring', + 'ctgmath', + 'ctime', + 'cuchar', + 'cwchar', + 'cwctype', + ]) + +# C headers +_C_HEADERS = frozenset([ + # System C headers + 'assert.h', + 'complex.h', + 'ctype.h', + 'errno.h', + 'fenv.h', + 'float.h', + 'inttypes.h', + 'iso646.h', + 'limits.h', + 'locale.h', + 'math.h', + 'setjmp.h', + 'signal.h', + 'stdalign.h', + 'stdarg.h', + 'stdatomic.h', + 'stdbool.h', + 'stddef.h', + 'stdint.h', + 'stdio.h', + 'stdlib.h', + 'stdnoreturn.h', + 'string.h', + 'tgmath.h', + 'threads.h', + 'time.h', + 'uchar.h', + 'wchar.h', + 'wctype.h', + # additional POSIX C headers + 'aio.h', + 'arpa/inet.h', + 'cpio.h', + 'dirent.h', + 'dlfcn.h', + 'fcntl.h', + 'fmtmsg.h', + 'fnmatch.h', + 'ftw.h', + 'glob.h', + 'grp.h', + 'iconv.h', + 'langinfo.h', + 'libgen.h', + 'monetary.h', + 'mqueue.h', + 'ndbm.h', + 'net/if.h', + 'netdb.h', + 'netinet/in.h', + 'netinet/tcp.h', + 'nl_types.h', + 'poll.h', + 'pthread.h', + 'pwd.h', + 'regex.h', + 'sched.h', + 'search.h', + 'semaphore.h', + 'setjmp.h', + 'signal.h', + 'spawn.h', + 'strings.h', + 'stropts.h', + 'syslog.h', + 'tar.h', + 'termios.h', + 'trace.h', + 'ulimit.h', + 'unistd.h', + 'utime.h', + 'utmpx.h', + 'wordexp.h', + # additional GNUlib headers + 'a.out.h', + 'aliases.h', + 'alloca.h', + 'ar.h', + 'argp.h', + 'argz.h', + 'byteswap.h', + 'crypt.h', + 'endian.h', + 'envz.h', + 'err.h', + 'error.h', + 'execinfo.h', + 'fpu_control.h', + 'fstab.h', + 'fts.h', + 'getopt.h', + 'gshadow.h', + 'ieee754.h', + 'ifaddrs.h', + 'libintl.h', + 'mcheck.h', + 'mntent.h', + 'obstack.h', + 'paths.h', + 'printf.h', + 'pty.h', + 'resolv.h', + 'shadow.h', + 'sysexits.h', + 'ttyent.h', + # Additional linux glibc headers + 'dlfcn.h', + 'elf.h', + 'features.h', + 'gconv.h', + 'gnu-versions.h', + 'lastlog.h', + 'libio.h', + 'link.h', + 'malloc.h', + 'memory.h', + 'netash/ash.h', + 'netatalk/at.h', + 'netax25/ax25.h', + 'neteconet/ec.h', + 'netipx/ipx.h', + 'netiucv/iucv.h', + 'netpacket/packet.h', + 'netrom/netrom.h', + 'netrose/rose.h', + 'nfs/nfs.h', + 'nl_types.h', + 'nss.h', + 're_comp.h', + 'regexp.h', + 'sched.h', + 'sgtty.h', + 'stab.h', + 'stdc-predef.h', + 'stdio_ext.h', + 'syscall.h', + 'termio.h', + 'thread_db.h', + 'ucontext.h', + 'ustat.h', + 'utmp.h', + 'values.h', + 'wait.h', + 'xlocale.h', + # Hardware specific headers + 'arm_neon.h', + 'emmintrin.h', + 'xmmintin.h', + ]) + +# Folders of C libraries so commonly used in C++, +# that they have parity with standard C libraries. +C_STANDARD_HEADER_FOLDERS = frozenset([ + # standard C library + "sys", + # glibc for linux + "arpa", + "asm-generic", + "bits", + "gnu", + "net", + "netinet", + "protocols", + "rpc", + "rpcsvc", + "scsi", + # linux kernel header + "drm", + "linux", + "misc", + "mtd", + "rdma", + "sound", + "video", + "xen", + ]) + +# Type names +_TYPES = re.compile( + r'^(?:' + # [dcl.type.simple] + r'(char(16_t|32_t)?)|wchar_t|' + r'bool|short|int|long|signed|unsigned|float|double|' + # [support.types] + r'(ptrdiff_t|size_t|max_align_t|nullptr_t)|' + # [cstdint.syn] + r'(u?int(_fast|_least)?(8|16|32|64)_t)|' + r'(u?int(max|ptr)_t)|' + r')$') + +# These headers are excluded from [build/include] and [build/include_order] +# checks: +# - Anything not following google file name conventions (containing an +# uppercase character, such as Python.h or nsStringAPI.h, for example). +# - Lua headers. +_THIRD_PARTY_HEADERS_PATTERN = re.compile( + r'^(?:[^/]*[A-Z][^/]*\.h|lua\.h|lauxlib\.h|lualib\.h)$') + +# Pattern for matching FileInfo.BaseName() against test file name +_test_suffixes = ['_test', '_regtest', '_unittest'] +_TEST_FILE_SUFFIX = '(' + '|'.join(_test_suffixes) + r')$' + +# Pattern that matches only complete whitespace, possibly across multiple lines. +_EMPTY_CONDITIONAL_BODY_PATTERN = re.compile(r'^\s*$', re.DOTALL) + +# Assertion macros. These are defined in base/logging.h and +# testing/base/public/gunit.h. +_CHECK_MACROS = [ + 'DCHECK', 'CHECK', + 'EXPECT_TRUE', 'ASSERT_TRUE', + 'EXPECT_FALSE', 'ASSERT_FALSE', + ] + +# Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE +_CHECK_REPLACEMENT = dict([(macro_var, {}) for macro_var in _CHECK_MACROS]) + +for op, replacement in [('==', 'EQ'), ('!=', 'NE'), + ('>=', 'GE'), ('>', 'GT'), + ('<=', 'LE'), ('<', 'LT')]: + _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement + _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement + _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement + _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement + +for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), + ('>=', 'LT'), ('>', 'LE'), + ('<=', 'GT'), ('<', 'GE')]: + _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement + _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement + +# Alternative tokens and their replacements. For full list, see section 2.5 +# Alternative tokens [lex.digraph] in the C++ standard. +# +# Digraphs (such as '%:') are not included here since it's a mess to +# match those on a word boundary. +_ALT_TOKEN_REPLACEMENT = { + 'and': '&&', + 'bitor': '|', + 'or': '||', + 'xor': '^', + 'compl': '~', + 'bitand': '&', + 'and_eq': '&=', + 'or_eq': '|=', + 'xor_eq': '^=', + 'not': '!', + 'not_eq': '!=' + } + +# Compile regular expression that matches all the above keywords. The "[ =()]" +# bit is meant to avoid matching these keywords outside of boolean expressions. +# +# False positives include C-style multi-line comments and multi-line strings +# but those have always been troublesome for cpplint. +_ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( + r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') + +# These constants define types of headers for use with +# _IncludeState.CheckNextIncludeOrder(). +_C_SYS_HEADER = 1 +_CPP_SYS_HEADER = 2 +_OTHER_SYS_HEADER = 3 +_LIKELY_MY_HEADER = 4 +_POSSIBLE_MY_HEADER = 5 +_OTHER_HEADER = 6 + +# These constants define the current inline assembly state +_NO_ASM = 0 # Outside of inline assembly block +_INSIDE_ASM = 1 # Inside inline assembly block +_END_ASM = 2 # Last line of inline assembly block +_BLOCK_ASM = 3 # The whole block is an inline assembly block + +# Match start of assembly blocks +_MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' + r'(?:\s+(volatile|__volatile__))?' + r'\s*[{(]') + +# Match strings that indicate we're working on a C (not C++) file. +_SEARCH_C_FILE = re.compile(r'\b(?:LINT_C_FILE|' + r'vim?:\s*.*(\s*|:)filetype=c(\s*|:|$))') + +# Match string that indicates we're working on a Linux Kernel file. +_SEARCH_KERNEL_FILE = re.compile(r'\b(?:LINT_KERNEL_FILE)') + +# Commands for sed to fix the problem +_SED_FIXUPS = { + 'Remove spaces around =': r's/ = /=/', + 'Remove spaces around !=': r's/ != /!=/', + 'Remove space before ( in if (': r's/if (/if(/', + 'Remove space before ( in for (': r's/for (/for(/', + 'Remove space before ( in while (': r's/while (/while(/', + 'Remove space before ( in switch (': r's/switch (/switch(/', + 'Should have a space between // and comment': r's/\/\//\/\/ /', + 'Missing space before {': r's/\([^ ]\){/\1 {/', + 'Tab found, replace by spaces': r's/\t/ /g', + 'Line ends in whitespace. Consider deleting these extra spaces.': r's/\s*$//', + 'You don\'t need a ; after a }': r's/};/}/', + 'Missing space after ,': r's/,\([^ ]\)/, \1/g', +} + +_regexp_compile_cache = {} + +# {str, set(int)}: a map from error categories to sets of linenumbers +# on which those errors are expected and should be suppressed. +_error_suppressions = {} + +# The root directory used for deriving header guard CPP variable. +# This is set by --root flag. +_root = None +_root_debug = False + +# The top level repository directory. If set, _root is calculated relative to +# this directory instead of the directory containing version control artifacts. +# This is set by the --repository flag. +_repository = None + +# Files to exclude from linting. This is set by the --exclude flag. +_excludes = None + +# Whether to supress PrintInfo messages +_quiet = False + +# The allowed line length of files. +# This is set by --linelength flag. +_line_length = 80 + +# This allows to use different include order rule than default +_include_order = "default" + +try: + unicode +except NameError: + # -- pylint: disable=redefined-builtin + basestring = unicode = str + +try: + long +except NameError: + # -- pylint: disable=redefined-builtin + long = int + +if sys.version_info < (3,): + # -- pylint: disable=no-member + # BINARY_TYPE = str + itervalues = dict.itervalues + iteritems = dict.iteritems +else: + # BINARY_TYPE = bytes + itervalues = dict.values + iteritems = dict.items + +def unicode_escape_decode(x): + if sys.version_info < (3,): + return codecs.unicode_escape_decode(x)[0] + else: + return x + +# Treat all headers starting with 'h' equally: .h, .hpp, .hxx etc. +# This is set by --headers flag. +_hpp_headers = set([]) + +# {str, bool}: a map from error categories to booleans which indicate if the +# category should be suppressed for every line. +_global_error_suppressions = {} + +def ProcessHppHeadersOption(val): + global _hpp_headers + try: + _hpp_headers = {ext.strip() for ext in val.split(',')} + except ValueError: + PrintUsage('Header extensions must be comma separated list.') + +def ProcessIncludeOrderOption(val): + if val is None or val == "default": + pass + elif val == "standardcfirst": + global _include_order + _include_order = val + else: + PrintUsage('Invalid includeorder value %s. Expected default|standardcfirst') + +def IsHeaderExtension(file_extension): + return file_extension in GetHeaderExtensions() + +def GetHeaderExtensions(): + if _hpp_headers: + return _hpp_headers + if _valid_extensions: + return {h for h in _valid_extensions if 'h' in h} + return set(['h', 'hh', 'hpp', 'hxx', 'h++', 'cuh']) + +# The allowed extensions for file names +# This is set by --extensions flag +def GetAllExtensions(): + return GetHeaderExtensions().union(_valid_extensions or set( + ['c', 'cc', 'cpp', 'cxx', 'c++', 'cu'])) + +def ProcessExtensionsOption(val): + global _valid_extensions + try: + extensions = [ext.strip() for ext in val.split(',')] + _valid_extensions = set(extensions) + except ValueError: + PrintUsage('Extensions should be a comma-separated list of values;' + 'for example: extensions=hpp,cpp\n' + 'This could not be parsed: "%s"' % (val,)) + +def GetNonHeaderExtensions(): + return GetAllExtensions().difference(GetHeaderExtensions()) + +def ParseNolintSuppressions(filename, raw_line, linenum, error): + """Updates the global list of line error-suppressions. + + Parses any NOLINT comments on the current line, updating the global + error_suppressions store. Reports an error if the NOLINT comment + was malformed. + + Args: + filename: str, the name of the input file. + raw_line: str, the line of input text, with comments. + linenum: int, the number of the current line. + error: function, an error handler. + """ + matched = Search(r'\bNOLINT(NEXTLINE)?\b(\([^)]+\))?', raw_line) + if matched: + if matched.group(1): + suppressed_line = linenum + 1 + else: + suppressed_line = linenum + category = matched.group(2) + if category in (None, '(*)'): # => "suppress all" + _error_suppressions.setdefault(None, set()).add(suppressed_line) + else: + if category.startswith('(') and category.endswith(')'): + category = category[1:-1] + if category in _ERROR_CATEGORIES: + _error_suppressions.setdefault(category, set()).add(suppressed_line) + elif category not in _LEGACY_ERROR_CATEGORIES: + error(filename, linenum, 'readability/nolint', 5, + 'Unknown NOLINT error category: %s' % category) + +def ProcessGlobalSuppresions(lines): + """Updates the list of global error suppressions. + + Parses any lint directives in the file that have global effect. + + Args: + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + """ + for line in lines: + if _SEARCH_C_FILE.search(line): + for category in _DEFAULT_C_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + if _SEARCH_KERNEL_FILE.search(line): + for category in _DEFAULT_KERNEL_SUPPRESSED_CATEGORIES: + _global_error_suppressions[category] = True + +def ResetNolintSuppressions(): + """Resets the set of NOLINT suppressions to empty.""" + _error_suppressions.clear() + _global_error_suppressions.clear() + +def IsErrorSuppressedByNolint(category, linenum): + """Returns true if the specified error category is suppressed on this line. + + Consults the global error_suppressions map populated by + ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. + + Args: + category: str, the category of the error. + linenum: int, the current line number. + Returns: + bool, True iff the error should be suppressed due to a NOLINT comment or + global suppression. + """ + return (_global_error_suppressions.get(category, False) or + linenum in _error_suppressions.get(category, set()) or + linenum in _error_suppressions.get(None, set())) + +def Match(pattern, s): + """Matches the string with the pattern, caching the compiled regexp.""" + # The regexp compilation caching is inlined in both Match and Search for + # performance reasons; factoring it out into a separate function turns out + # to be noticeably expensive. + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].match(s) + +def ReplaceAll(pattern, rep, s): + """Replaces instances of pattern in a string with a replacement. + + The compiled regex is kept in a cache shared by Match and Search. + + Args: + pattern: regex pattern + rep: replacement text + s: search string + + Returns: + string with replacements made (or original string if no replacements) + """ + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].sub(rep, s) + +def Search(pattern, s): + """Searches the string for the pattern, caching the compiled regexp.""" + if pattern not in _regexp_compile_cache: + _regexp_compile_cache[pattern] = sre_compile.compile(pattern) + return _regexp_compile_cache[pattern].search(s) + +def _IsSourceExtension(s): + """File extension (excluding dot) matches a source file extension.""" + return s in GetNonHeaderExtensions() + +class _IncludeState(object): + """Tracks line numbers for includes, and the order in which includes appear. + + include_list contains list of lists of (header, line number) pairs. + It's a lists of lists rather than just one flat list to make it + easier to update across preprocessor boundaries. + + Call CheckNextIncludeOrder() once for each header in the file, passing + in the type constants defined above. Calls in an illegal order will + raise an _IncludeError with an appropriate error message. + + """ + # self._section will move monotonically through this set. If it ever + # needs to move backwards, CheckNextIncludeOrder will raise an error. + _INITIAL_SECTION = 0 + _MY_H_SECTION = 1 + _C_SECTION = 2 + _CPP_SECTION = 3 + _OTHER_SYS_SECTION = 4 + _OTHER_H_SECTION = 5 + + _TYPE_NAMES = { + _C_SYS_HEADER: 'C system header', + _CPP_SYS_HEADER: 'C++ system header', + _OTHER_SYS_HEADER: 'other system header', + _LIKELY_MY_HEADER: 'header this file implements', + _POSSIBLE_MY_HEADER: 'header this file may implement', + _OTHER_HEADER: 'other header', + } + _SECTION_NAMES = { + _INITIAL_SECTION: "... nothing. (This can't be an error.)", + _MY_H_SECTION: 'a header this file implements', + _C_SECTION: 'C system header', + _CPP_SECTION: 'C++ system header', + _OTHER_SYS_SECTION: 'other system header', + _OTHER_H_SECTION: 'other header', + } + + def __init__(self): + self.include_list = [[]] + self._section = None + self._last_header = None + self.ResetSection('') + + def FindHeader(self, header): + """Check if a header has already been included. + + Args: + header: header to check. + Returns: + Line number of previous occurrence, or -1 if the header has not + been seen before. + """ + for section_list in self.include_list: + for f in section_list: + if f[0] == header: + return f[1] + return -1 + + def ResetSection(self, directive): + """Reset section checking for preprocessor directive. + + Args: + directive: preprocessor directive (e.g. "if", "else"). + """ + # The name of the current section. + self._section = self._INITIAL_SECTION + # The path of last found header. + self._last_header = '' + + # Update list of includes. Note that we never pop from the + # include list. + if directive in ('if', 'ifdef', 'ifndef'): + self.include_list.append([]) + elif directive in ('else', 'elif'): + self.include_list[-1] = [] + + def SetLastHeader(self, header_path): + self._last_header = header_path + + def CanonicalizeAlphabeticalOrder(self, header_path): + """Returns a path canonicalized for alphabetical comparison. + + - replaces "-" with "_" so they both cmp the same. + - removes '-inl' since we don't require them to be after the main header. + - lowercase everything, just in case. + + Args: + header_path: Path to be canonicalized. + + Returns: + Canonicalized path. + """ + return header_path.replace('-inl.h', '.h').replace('-', '_').lower() + + def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): + """Check if a header is in alphabetical order with the previous header. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + header_path: Canonicalized header to be checked. + + Returns: + Returns true if the header is in alphabetical order. + """ + # If previous section is different from current section, _last_header will + # be reset to empty string, so it's always less than current header. + # + # If previous line was a blank line, assume that the headers are + # intentionally sorted the way they are. + if (self._last_header > header_path and + Match(r'^\s*#\s*include\b', clean_lines.elided[linenum - 1])): + return False + return True + + def CheckNextIncludeOrder(self, header_type): + """Returns a non-empty error message if the next header is out of order. + + This function also updates the internal state to be ready to check + the next include. + + Args: + header_type: One of the _XXX_HEADER constants defined above. + + Returns: + The empty string if the header is in the right order, or an + error message describing what's wrong. + + """ + error_message = ('Found %s after %s' % + (self._TYPE_NAMES[header_type], + self._SECTION_NAMES[self._section])) + + last_section = self._section + + if header_type == _C_SYS_HEADER: + if self._section <= self._C_SECTION: + self._section = self._C_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _CPP_SYS_HEADER: + if self._section <= self._CPP_SECTION: + self._section = self._CPP_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _OTHER_SYS_HEADER: + if self._section <= self._OTHER_SYS_SECTION: + self._section = self._OTHER_SYS_SECTION + else: + self._last_header = '' + return error_message + elif header_type == _LIKELY_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + self._section = self._OTHER_H_SECTION + elif header_type == _POSSIBLE_MY_HEADER: + if self._section <= self._MY_H_SECTION: + self._section = self._MY_H_SECTION + else: + # This will always be the fallback because we're not sure + # enough that the header is associated with this file. + self._section = self._OTHER_H_SECTION + else: + assert header_type == _OTHER_HEADER + self._section = self._OTHER_H_SECTION + + if last_section != self._section: + self._last_header = '' + + return '' + +class _CppLintState(object): + """Maintains module-wide state..""" + + def __init__(self): + self.verbose_level = 1 # global setting. + self.error_count = 0 # global count of reported errors + # filters to apply when emitting error messages + self.filters = _DEFAULT_FILTERS[:] + # backup of filter list. Used to restore the state after each file. + self._filters_backup = self.filters[:] + self.counting = 'total' # In what way are we counting errors? + self.errors_by_category = {} # string to int dict storing error counts + self.quiet = False # Suppress non-error messagess? + + # output format: + # "emacs" - format that emacs can parse (default) + # "eclipse" - format that eclipse can parse + # "vs7" - format that Microsoft Visual Studio 7 can parse + # "junit" - format that Jenkins, Bamboo, etc can parse + # "sed" - returns a gnu sed command to fix the problem + # "gsed" - like sed, but names the command gsed, e.g. for macOS homebrew users + self.output_format = 'emacs' + + # For JUnit output, save errors and failures until the end so that they + # can be written into the XML + self._junit_errors = [] + self._junit_failures = [] + + def SetOutputFormat(self, output_format): + """Sets the output format for errors.""" + self.output_format = output_format + + def SetQuiet(self, quiet): + """Sets the module's quiet settings, and returns the previous setting.""" + last_quiet = self.quiet + self.quiet = quiet + return last_quiet + + def SetVerboseLevel(self, level): + """Sets the module's verbosity, and returns the previous setting.""" + last_verbose_level = self.verbose_level + self.verbose_level = level + return last_verbose_level + + def SetCountingStyle(self, counting_style): + """Sets the module's counting options.""" + self.counting = counting_style + + def SetFilters(self, filters): + """Sets the error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "+whitespace/indent"). + Each filter should start with + or -; else we die. + + Raises: + ValueError: The comma-separated filters did not all start with '+' or '-'. + E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" + """ + # Default filters always have less priority than the flag ones. + self.filters = _DEFAULT_FILTERS[:] + self.AddFilters(filters) + + def AddFilters(self, filters): + """ Adds more filters to the existing list of error-message filters. """ + for filt in filters.split(','): + clean_filt = filt.strip() + if clean_filt: + self.filters.append(clean_filt) + for filt in self.filters: + if not (filt.startswith('+') or filt.startswith('-')): + raise ValueError('Every filter in --filters must start with + or -' + ' (%s does not)' % filt) + + def BackupFilters(self): + """ Saves the current filter list to backup storage.""" + self._filters_backup = self.filters[:] + + def RestoreFilters(self): + """ Restores filters previously backed up.""" + self.filters = self._filters_backup[:] + + def ResetErrorCounts(self): + """Sets the module's error statistic back to zero.""" + self.error_count = 0 + self.errors_by_category = {} + + def IncrementErrorCount(self, category): + """Bumps the module's error statistic.""" + self.error_count += 1 + if self.counting in ('toplevel', 'detailed'): + if self.counting != 'detailed': + category = category.split('/')[0] + if category not in self.errors_by_category: + self.errors_by_category[category] = 0 + self.errors_by_category[category] += 1 + + def PrintErrorCounts(self): + """Print a summary of errors by category, and the total.""" + for category, count in sorted(iteritems(self.errors_by_category)): + self.PrintInfo('Category \'%s\' errors found: %d\n' % + (category, count)) + if self.error_count > 0: + self.PrintInfo('Total errors found: %d\n' % self.error_count) + + def PrintInfo(self, message): + if not _quiet and self.output_format != 'junit': + sys.stdout.write(message) + + def PrintError(self, message): + if self.output_format == 'junit': + self._junit_errors.append(message) + else: + sys.stderr.write(message) + + def AddJUnitFailure(self, filename, linenum, message, category, confidence): + self._junit_failures.append((filename, linenum, message, category, + confidence)) + + def FormatJUnitXML(self): + num_errors = len(self._junit_errors) + num_failures = len(self._junit_failures) + + testsuite = xml.etree.ElementTree.Element('testsuite') + testsuite.attrib['errors'] = str(num_errors) + testsuite.attrib['failures'] = str(num_failures) + testsuite.attrib['name'] = 'cpplint' + + if num_errors == 0 and num_failures == 0: + testsuite.attrib['tests'] = str(1) + xml.etree.ElementTree.SubElement(testsuite, 'testcase', name='passed') + + else: + testsuite.attrib['tests'] = str(num_errors + num_failures) + if num_errors > 0: + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = 'errors' + error = xml.etree.ElementTree.SubElement(testcase, 'error') + error.text = '\n'.join(self._junit_errors) + if num_failures > 0: + # Group failures by file + failed_file_order = [] + failures_by_file = {} + for failure in self._junit_failures: + failed_file = failure[0] + if failed_file not in failed_file_order: + failed_file_order.append(failed_file) + failures_by_file[failed_file] = [] + failures_by_file[failed_file].append(failure) + # Create a testcase for each file + for failed_file in failed_file_order: + failures = failures_by_file[failed_file] + testcase = xml.etree.ElementTree.SubElement(testsuite, 'testcase') + testcase.attrib['name'] = failed_file + failure = xml.etree.ElementTree.SubElement(testcase, 'failure') + template = '{0}: {1} [{2}] [{3}]' + texts = [template.format(f[1], f[2], f[3], f[4]) for f in failures] + failure.text = '\n'.join(texts) + + xml_decl = '\n' + return xml_decl + xml.etree.ElementTree.tostring(testsuite, 'utf-8').decode('utf-8') + +_cpplint_state = _CppLintState() + +def _OutputFormat(): + """Gets the module's output format.""" + return _cpplint_state.output_format + +def _SetOutputFormat(output_format): + """Sets the module's output format.""" + _cpplint_state.SetOutputFormat(output_format) + +def _Quiet(): + """Return's the module's quiet setting.""" + return _cpplint_state.quiet + +def _SetQuiet(quiet): + """Set the module's quiet status, and return previous setting.""" + return _cpplint_state.SetQuiet(quiet) + +def _VerboseLevel(): + """Returns the module's verbosity setting.""" + return _cpplint_state.verbose_level + +def _SetVerboseLevel(level): + """Sets the module's verbosity, and returns the previous setting.""" + return _cpplint_state.SetVerboseLevel(level) + +def _SetCountingStyle(level): + """Sets the module's counting options.""" + _cpplint_state.SetCountingStyle(level) + +def _Filters(): + """Returns the module's list of output filters, as a list.""" + return _cpplint_state.filters + +def _SetFilters(filters): + """Sets the module's error-message filters. + + These filters are applied when deciding whether to emit a given + error message. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.SetFilters(filters) + +def _AddFilters(filters): + """Adds more filter overrides. + + Unlike _SetFilters, this function does not reset the current list of filters + available. + + Args: + filters: A string of comma-separated filters (eg "whitespace/indent"). + Each filter should start with + or -; else we die. + """ + _cpplint_state.AddFilters(filters) + +def _BackupFilters(): + """ Saves the current filter list to backup storage.""" + _cpplint_state.BackupFilters() + +def _RestoreFilters(): + """ Restores filters previously backed up.""" + _cpplint_state.RestoreFilters() + +class _FunctionState(object): + """Tracks current function name and the number of lines in its body.""" + + _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. + _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. + + def __init__(self): + self.in_a_function = False + self.lines_in_function = 0 + self.current_function = '' + + def Begin(self, function_name): + """Start analyzing function body. + + Args: + function_name: The name of the function being tracked. + """ + self.in_a_function = True + self.lines_in_function = 0 + self.current_function = function_name + + def Count(self): + """Count line in current function body.""" + if self.in_a_function: + self.lines_in_function += 1 + + def Check(self, error, filename, linenum): + """Report if too many lines in function body. + + Args: + error: The function to call with any errors found. + filename: The name of the current file. + linenum: The number of the line to check. + """ + if not self.in_a_function: + return + + if Match(r'T(EST|est)', self.current_function): + base_trigger = self._TEST_TRIGGER + else: + base_trigger = self._NORMAL_TRIGGER + trigger = base_trigger * 2**_VerboseLevel() + + if self.lines_in_function > trigger: + error_level = int(math.log(self.lines_in_function / base_trigger, 2)) + # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... + if error_level > 5: + error_level = 5 + error(filename, linenum, 'readability/fn_size', error_level, + 'Small and focused functions are preferred:' + ' %s has %d non-comment lines' + ' (error triggered by exceeding %d lines).' % ( + self.current_function, self.lines_in_function, trigger)) + + def End(self): + """Stop analyzing function body.""" + self.in_a_function = False + +class _IncludeError(Exception): + """Indicates a problem with the include order in a file.""" + pass + +class FileInfo(object): + """Provides utility functions for filenames. + + FileInfo provides easy access to the components of a file's path + relative to the project root. + """ + + def __init__(self, filename): + self._filename = filename + + def FullName(self): + """Make Windows paths like Unix.""" + return os.path.abspath(self._filename).replace('\\', '/') + + def RepositoryName(self): + r"""FullName after removing the local path to the repository. + + If we have a real absolute path name here we can try to do something smart: + detecting the root of the checkout and truncating /path/to/checkout from + the name so that we get header guards that don't include things like + "C:\\Documents and Settings\\..." or "/home/username/..." in them and thus + people on different computers who have checked the source out to different + locations won't see bogus errors. + """ + fullname = self.FullName() + + if os.path.exists(fullname): + project_dir = os.path.dirname(fullname) + + # If the user specified a repository path, it exists, and the file is + # contained in it, use the specified repository path + if _repository: + repo = FileInfo(_repository).FullName() + root_dir = project_dir + while os.path.exists(root_dir): + # allow case insensitive compare on Windows + if os.path.normcase(root_dir) == os.path.normcase(repo): + return os.path.relpath(fullname, root_dir).replace('\\', '/') + one_up_dir = os.path.dirname(root_dir) + if one_up_dir == root_dir: + break + root_dir = one_up_dir + + if os.path.exists(os.path.join(project_dir, ".svn")): + # If there's a .svn file in the current directory, we recursively look + # up the directory tree for the top of the SVN checkout + root_dir = project_dir + one_up_dir = os.path.dirname(root_dir) + while os.path.exists(os.path.join(one_up_dir, ".svn")): + root_dir = os.path.dirname(root_dir) + one_up_dir = os.path.dirname(one_up_dir) + + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by + # searching up from the current path. + root_dir = current_dir = os.path.dirname(fullname) + while current_dir != os.path.dirname(current_dir): + if (os.path.exists(os.path.join(current_dir, ".git")) or + os.path.exists(os.path.join(current_dir, ".hg")) or + os.path.exists(os.path.join(current_dir, ".svn"))): + root_dir = current_dir + current_dir = os.path.dirname(current_dir) + + if (os.path.exists(os.path.join(root_dir, ".git")) or + os.path.exists(os.path.join(root_dir, ".hg")) or + os.path.exists(os.path.join(root_dir, ".svn"))): + prefix = os.path.commonprefix([root_dir, project_dir]) + return fullname[len(prefix) + 1:] + + # Don't know what to do; header guard warnings may be wrong... + return fullname + + def Split(self): + """Splits the file into the directory, basename, and extension. + + For 'chrome/browser/browser.cc', Split() would + return ('chrome/browser', 'browser', '.cc') + + Returns: + A tuple of (directory, basename, extension). + """ + + googlename = self.RepositoryName() + project, rest = os.path.split(googlename) + return (project,) + os.path.splitext(rest) + + def BaseName(self): + """File base name - text after the final slash, before the final period.""" + return self.Split()[1] + + def Extension(self): + """File extension - text following the final period, includes that period.""" + return self.Split()[2] + + def NoExtension(self): + """File has no source file extension.""" + return '/'.join(self.Split()[0:2]) + + def IsSource(self): + """File has a source file extension.""" + return _IsSourceExtension(self.Extension()[1:]) + +def _ShouldPrintError(category, confidence, linenum): + """If confidence >= verbose, category passes filter and is not suppressed.""" + + # There are three ways we might decide not to print an error message: + # a "NOLINT(category)" comment appears in the source, + # the verbosity level isn't high enough, or the filters filter it out. + if IsErrorSuppressedByNolint(category, linenum): + return False + + if confidence < _cpplint_state.verbose_level: + return False + + is_filtered = False + for one_filter in _Filters(): + if one_filter.startswith('-'): + if category.startswith(one_filter[1:]): + is_filtered = True + elif one_filter.startswith('+'): + if category.startswith(one_filter[1:]): + is_filtered = False + else: + assert False # should have been checked for in SetFilter. + if is_filtered: + return False + + return True + +def Error(filename, linenum, category, confidence, message): + """Logs the fact we've found a lint error. + + We log where the error was found, and also our confidence in the error, + that is, how certain we are this is a legitimate style regression, and + not a misidentification or a use that's sometimes justified. + + False positives can be suppressed by the use of + "cpplint(category)" comments on the offending line. These are + parsed into _error_suppressions. + + Args: + filename: The name of the file containing the error. + linenum: The number of the line containing the error. + category: A string used to describe the "category" this bug + falls under: "whitespace", say, or "runtime". Categories + may have a hierarchy separated by slashes: "whitespace/indent". + confidence: A number from 1-5 representing a confidence score for + the error, with 5 meaning that we are certain of the problem, + and 1 meaning that it could be a legitimate construct. + message: The error message. + """ + if _ShouldPrintError(category, confidence, linenum): + _cpplint_state.IncrementErrorCount(category) + if _cpplint_state.output_format == 'vs7': + _cpplint_state.PrintError('%s(%s): error cpplint: [%s] %s [%d]\n' % ( + filename, linenum, category, message, confidence)) + elif _cpplint_state.output_format == 'eclipse': + sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + elif _cpplint_state.output_format == 'junit': + _cpplint_state.AddJUnitFailure(filename, linenum, message, category, + confidence) + elif _cpplint_state.output_format in ['sed', 'gsed']: + if message in _SED_FIXUPS: + sys.stdout.write(_cpplint_state.output_format + " -i '%s%s' %s # %s [%s] [%d]\n" % ( + linenum, _SED_FIXUPS[message], filename, message, category, confidence)) + else: + sys.stderr.write('# %s:%s: "%s" [%s] [%d]\n' % ( + filename, linenum, message, category, confidence)) + else: + final_message = '%s:%s: %s [%s] [%d]\n' % ( + filename, linenum, message, category, confidence) + sys.stderr.write(final_message) + +# Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. +_RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( + r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') +# Match a single C style comment on the same line. +_RE_PATTERN_C_COMMENTS = r'/\*(?:[^*]|\*(?!/))*\*/' +# Matches multi-line C style comments. +# This RE is a little bit more complicated than one might expect, because we +# have to take care of space removals tools so we can handle comments inside +# statements better. +# The current rule is: We only clear spaces from both sides when we're at the +# end of the line. Otherwise, we try to remove spaces from the right side, +# if this doesn't work we try on left side but only if there's a non-character +# on the right. +_RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( + r'(\s*' + _RE_PATTERN_C_COMMENTS + r'\s*$|' + + _RE_PATTERN_C_COMMENTS + r'\s+|' + + r'\s+' + _RE_PATTERN_C_COMMENTS + r'(?=\W)|' + + _RE_PATTERN_C_COMMENTS + r')') + +def IsCppString(line): + """Does line terminate so, that the next symbol is in string constant. + + This function does not consider single-line nor multi-line comments. + + Args: + line: is a partial line of code starting from the 0..n. + + Returns: + True, if next character appended to 'line' is inside a + string constant. + """ + + line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" + return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 + +def CleanseRawStrings(raw_lines): + """Removes C++11 raw strings from lines. + + Before: + static const char kData[] = R"( + multi-line string + )"; + + After: + static const char kData[] = "" + (replaced by blank line) + ""; + + Args: + raw_lines: list of raw lines. + + Returns: + list of lines with C++11 raw strings replaced by empty strings. + """ + + delimiter = None + lines_without_raw_strings = [] + for line in raw_lines: + if delimiter: + # Inside a raw string, look for the end + end = line.find(delimiter) + if end >= 0: + # Found the end of the string, match leading space for this + # line and resume copying the original lines, and also insert + # a "" on the last line. + leading_space = Match(r'^(\s*)\S', line) + line = leading_space.group(1) + '""' + line[end + len(delimiter):] + delimiter = None + else: + # Haven't found the end yet, append a blank line. + line = '""' + + # Look for beginning of a raw string, and replace them with + # empty strings. This is done in a loop to handle multiple raw + # strings on the same line. + while delimiter is None: + # Look for beginning of a raw string. + # See 2.14.15 [lex.string] for syntax. + # + # Once we have matched a raw string, we check the prefix of the + # line to make sure that the line is not part of a single line + # comment. It's done this way because we remove raw strings + # before removing comments as opposed to removing comments + # before removing raw strings. This is because there are some + # cpplint checks that requires the comments to be preserved, but + # we don't want to check comments that are inside raw strings. + matched = Match(r'^(.*?)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) + if (matched and + not Match(r'^([^\'"]|\'(\\.|[^\'])*\'|"(\\.|[^"])*")*//', + matched.group(1))): + delimiter = ')' + matched.group(2) + '"' + + end = matched.group(3).find(delimiter) + if end >= 0: + # Raw string ended on same line + line = (matched.group(1) + '""' + + matched.group(3)[end + len(delimiter):]) + delimiter = None + else: + # Start of a multi-line raw string + line = matched.group(1) + '""' + else: + break + + lines_without_raw_strings.append(line) + + # TODO(unknown): if delimiter is not None here, we might want to + # emit a warning for unterminated string. + return lines_without_raw_strings + +def FindNextMultiLineCommentStart(lines, lineix): + """Find the beginning marker for a multiline comment.""" + while lineix < len(lines): + if lines[lineix].strip().startswith('/*'): + # Only return this marker if the comment goes beyond this line + if lines[lineix].strip().find('*/', 2) < 0: + return lineix + lineix += 1 + return len(lines) + +def FindNextMultiLineCommentEnd(lines, lineix): + """We are inside a comment, find the end marker.""" + while lineix < len(lines): + if lines[lineix].strip().endswith('*/'): + return lineix + lineix += 1 + return len(lines) + +def RemoveMultiLineCommentsFromRange(lines, begin, end): + """Clears a range of lines for multi-line comments.""" + # Having // dummy comments makes the lines non-empty, so we will not get + # unnecessary blank line warnings later in the code. + for i in range(begin, end): + lines[i] = '/**/' + +def RemoveMultiLineComments(filename, lines, error): + """Removes multiline (c-style) comments from lines.""" + lineix = 0 + while lineix < len(lines): + lineix_begin = FindNextMultiLineCommentStart(lines, lineix) + if lineix_begin >= len(lines): + return + lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) + if lineix_end >= len(lines): + error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, + 'Could not find end of multi-line comment') + return + RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) + lineix = lineix_end + 1 + +def CleanseComments(line): + """Removes //-comments and single-line C-style /* */ comments. + + Args: + line: A line of C++ source. + + Returns: + The line with single-line comments removed. + """ + commentpos = line.find('//') + if commentpos != -1 and not IsCppString(line[:commentpos]): + line = line[:commentpos].rstrip() + # get rid of /* ... */ + return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) + +class CleansedLines(object): + """Holds 4 copies of all lines with different preprocessing applied to them. + + 1) elided member contains lines without strings and comments. + 2) lines member contains lines without comments. + 3) raw_lines member contains all the lines without processing. + 4) lines_without_raw_strings member is same as raw_lines, but with C++11 raw + strings removed. + All these members are of , and of the same length. + """ + + def __init__(self, lines): + self.elided = [] + self.lines = [] + self.raw_lines = lines + self.num_lines = len(lines) + self.lines_without_raw_strings = CleanseRawStrings(lines) + for linenum in range(len(self.lines_without_raw_strings)): + self.lines.append(CleanseComments( + self.lines_without_raw_strings[linenum])) + elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) + self.elided.append(CleanseComments(elided)) + + def NumLines(self): + """Returns the number of lines represented.""" + return self.num_lines + + @staticmethod + def _CollapseStrings(elided): + """Collapses strings and chars on a line to simple "" or '' blocks. + + We nix strings first so we're not fooled by text like '"http://"' + + Args: + elided: The line being processed. + + Returns: + The line with collapsed strings. + """ + if _RE_PATTERN_INCLUDE.match(elided): + return elided + + # Remove escaped characters first to make quote/single quote collapsing + # basic. Things that look like escaped characters shouldn't occur + # outside of strings and chars. + elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) + + # Replace quoted strings and digit separators. Both single quotes + # and double quotes are processed in the same loop, otherwise + # nested quotes wouldn't work. + collapsed = '' + while True: + # Find the first quote character + match = Match(r'^([^\'"]*)([\'"])(.*)$', elided) + if not match: + collapsed += elided + break + head, quote, tail = match.groups() + + if quote == '"': + # Collapse double quoted strings + second_quote = tail.find('"') + if second_quote >= 0: + collapsed += head + '""' + elided = tail[second_quote + 1:] + else: + # Unmatched double quote, don't bother processing the rest + # of the line since this is probably a multiline string. + collapsed += elided + break + else: + # Found single quote, check nearby text to eliminate digit separators. + # + # There is no special handling for floating point here, because + # the integer/fractional/exponent parts would all be parsed + # correctly as long as there are digits on both sides of the + # separator. So we are fine as long as we don't see something + # like "0.'3" (gcc 4.9.0 will not allow this literal). + if Search(r'\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$', head): + match_literal = Match(r'^((?:\'?[0-9a-zA-Z_])*)(.*)$', "'" + tail) + collapsed += head + match_literal.group(1).replace("'", '') + elided = match_literal.group(2) + else: + second_quote = tail.find('\'') + if second_quote >= 0: + collapsed += head + "''" + elided = tail[second_quote + 1:] + else: + # Unmatched single quote + collapsed += elided + break + + return collapsed + +def FindEndOfExpressionInLine(line, startpos, stack): + """Find the position just after the end of current parenthesized expression. + + Args: + line: a CleansedLines line. + startpos: start searching at this position. + stack: nesting stack at startpos. + + Returns: + On finding matching end: (index just after matching end, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at end of this line) + """ + for i in xrange(startpos, len(line)): + char = line[i] + if char in '([{': + # Found start of parenthesized expression, push to expression stack + stack.append(char) + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + if stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + elif i > 0 and Search(r'\boperator\s*$', line[0:i]): + # operator<, don't add to stack + continue + else: + # Tentative start of template argument list + stack.append('<') + elif char in ')]}': + # Found end of parenthesized expression. + # + # If we are currently expecting a matching '>', the pending '<' + # must have been an operator. Remove them from expression stack. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + if ((stack[-1] == '(' and char == ')') or + (stack[-1] == '[' and char == ']') or + (stack[-1] == '{' and char == '}')): + stack.pop() + if not stack: + return (i + 1, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == '>': + # Found potential end of template argument list. + + # Ignore "->" and operator functions + if (i > 0 and + (line[i - 1] == '-' or Search(r'\boperator\s*$', line[0:i - 1]))): + continue + + # Pop the stack if there is a matching '<'. Otherwise, ignore + # this '>' since it must be an operator. + if stack: + if stack[-1] == '<': + stack.pop() + if not stack: + return (i + 1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '>', the matching '<' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '<': + stack.pop() + if not stack: + return (-1, None) + + # Did not find end of expression or unbalanced parentheses on this line + return (-1, stack) + +def CloseExpression(clean_lines, linenum, pos): + """If input points to ( or { or [ or <, finds the position that closes it. + + If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the + linenum/pos that correspond to the closing of the expression. + + TODO(unknown): cpplint spends a fair bit of time matching parentheses. + Ideally we would want to index all opening and closing parentheses once + and have CloseExpression be just a simple lookup, but due to preprocessor + tricks, this is not so easy. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *past* the closing brace, or + (line, len(lines), -1) if we never find a close. Note we ignore + strings and comments when matching; and the line we return is the + 'cleansed' line at linenum. + """ + + line = clean_lines.elided[linenum] + if (line[pos] not in '({[<') or Match(r'<[<=]', line[pos:]): + return (line, clean_lines.NumLines(), -1) + + # Check first line + (end_pos, stack) = FindEndOfExpressionInLine(line, pos, []) + if end_pos > -1: + return (line, linenum, end_pos) + + # Continue scanning forward + while stack and linenum < clean_lines.NumLines() - 1: + linenum += 1 + line = clean_lines.elided[linenum] + (end_pos, stack) = FindEndOfExpressionInLine(line, 0, stack) + if end_pos > -1: + return (line, linenum, end_pos) + + # Did not find end of expression before end of file, give up + return (line, clean_lines.NumLines(), -1) + +def FindStartOfExpressionInLine(line, endpos, stack): + """Find position at the matching start of current expression. + + This is almost the reverse of FindEndOfExpressionInLine, but note + that the input position and returned position differs by 1. + + Args: + line: a CleansedLines line. + endpos: start searching at this position. + stack: nesting stack at endpos. + + Returns: + On finding matching start: (index at matching start, None) + On finding an unclosed expression: (-1, None) + Otherwise: (-1, new stack at beginning of this line) + """ + i = endpos + while i >= 0: + char = line[i] + if char in ')]}': + # Found end of expression, push to expression stack + stack.append(char) + elif char == '>': + # Found potential end of template argument list. + # + # Ignore it if it's a "->" or ">=" or "operator>" + if (i > 0 and + (line[i - 1] == '-' or + Match(r'\s>=\s', line[i - 1:]) or + Search(r'\boperator\s*$', line[0:i]))): + i -= 1 + else: + stack.append('>') + elif char == '<': + # Found potential start of template argument list + if i > 0 and line[i - 1] == '<': + # Left shift operator + i -= 1 + else: + # If there is a matching '>', we can pop the expression stack. + # Otherwise, ignore this '<' since it must be an operator. + if stack and stack[-1] == '>': + stack.pop() + if not stack: + return (i, None) + elif char in '([{': + # Found start of expression. + # + # If there are any unmatched '>' on the stack, they must be + # operators. Remove those. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + if ((char == '(' and stack[-1] == ')') or + (char == '[' and stack[-1] == ']') or + (char == '{' and stack[-1] == '}')): + stack.pop() + if not stack: + return (i, None) + else: + # Mismatched parentheses + return (-1, None) + elif char == ';': + # Found something that look like end of statements. If we are currently + # expecting a '<', the matching '>' must have been an operator, since + # template argument list should not contain statements. + while stack and stack[-1] == '>': + stack.pop() + if not stack: + return (-1, None) + + i -= 1 + + return (-1, stack) + +def ReverseCloseExpression(clean_lines, linenum, pos): + """If input points to ) or } or ] or >, finds the position that opens it. + + If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the + linenum/pos that correspond to the opening of the expression. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: A position on the line. + + Returns: + A tuple (line, linenum, pos) pointer *at* the opening brace, or + (line, 0, -1) if we never find the matching opening brace. Note + we ignore strings and comments when matching; and the line we + return is the 'cleansed' line at linenum. + """ + line = clean_lines.elided[linenum] + if line[pos] not in ')}]>': + return (line, 0, -1) + + # Check last line + (start_pos, stack) = FindStartOfExpressionInLine(line, pos, []) + if start_pos > -1: + return (line, linenum, start_pos) + + # Continue scanning backward + while stack and linenum > 0: + linenum -= 1 + line = clean_lines.elided[linenum] + (start_pos, stack) = FindStartOfExpressionInLine(line, len(line) - 1, stack) + if start_pos > -1: + return (line, linenum, start_pos) + + # Did not find start of expression before beginning of file, give up + return (line, 0, -1) + +def CheckForCopyright(filename, lines, error): + """Logs an error if no Copyright message appears at the top of the file.""" + + # We'll say it should occur by line 10. Don't forget there's a + # dummy line at the front. + for line in xrange(1, min(len(lines), 11)): + if re.search(r'Copyright', lines[line], re.I): break + else: # means no copyright line was found + error(filename, 0, 'legal/copyright', 5, + 'No copyright message found. ' + 'You should have a line: "Copyright [year] "') + +def GetIndentLevel(line): + """Return the number of leading spaces in line. + + Args: + line: A string to check. + + Returns: + An integer count of leading spaces, possibly zero. + """ + indent = Match(r'^( *)\S', line) + if indent: + return len(indent.group(1)) + else: + return 0 + +def PathSplitToList(path): + """Returns the path split into a list by the separator. + + Args: + path: An absolute or relative path (e.g. '/a/b/c/' or '../a') + + Returns: + A list of path components (e.g. ['a', 'b', 'c]). + """ + lst = [] + while True: + (head, tail) = os.path.split(path) + if head == path: # absolute paths end + lst.append(head) + break + if tail == path: # relative paths end + lst.append(tail) + break + + path = head + lst.append(tail) + + lst.reverse() + return lst + +def GetHeaderGuardCPPVariable(filename): + """Returns the CPP variable that should be used as a header guard. + + Args: + filename: The name of a C++ header file. + + Returns: + The CPP variable that should be used as a header guard in the + named file. + + """ + + # Restores original filename in case that cpplint is invoked from Emacs's + # flymake. + filename = re.sub(r'_flymake\.h$', '.h', filename) + filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) + # Replace 'c++' with 'cpp'. + filename = filename.replace('C++', 'cpp').replace('c++', 'cpp') + + fileinfo = FileInfo(filename) + file_path_from_root = fileinfo.RepositoryName() + + def FixupPathFromRoot(): + if _root_debug: + sys.stderr.write("\n_root fixup, _root = '%s', repository name = '%s'\n" + % (_root, fileinfo.RepositoryName())) + + # Process the file path with the --root flag if it was set. + if not _root: + if _root_debug: + sys.stderr.write("_root unspecified\n") + return file_path_from_root + + def StripListPrefix(lst, prefix): + # f(['x', 'y'], ['w, z']) -> None (not a valid prefix) + if lst[:len(prefix)] != prefix: + return None + # f(['a, 'b', 'c', 'd'], ['a', 'b']) -> ['c', 'd'] + return lst[(len(prefix)):] + + # root behavior: + # --root=subdir , lstrips subdir from the header guard + maybe_path = StripListPrefix(PathSplitToList(file_path_from_root), + PathSplitToList(_root)) + + if _root_debug: + sys.stderr.write(("_root lstrip (maybe_path=%s, file_path_from_root=%s," + + " _root=%s)\n") % (maybe_path, file_path_from_root, _root)) + + if maybe_path: + return os.path.join(*maybe_path) + + # --root=.. , will prepend the outer directory to the header guard + full_path = fileinfo.FullName() + root_abspath = os.path.abspath(_root) + + maybe_path = StripListPrefix(PathSplitToList(full_path), + PathSplitToList(root_abspath)) + + if _root_debug: + sys.stderr.write(("_root prepend (maybe_path=%s, full_path=%s, " + + "root_abspath=%s)\n") % (maybe_path, full_path, root_abspath)) + + if maybe_path: + return os.path.join(*maybe_path) + + if _root_debug: + sys.stderr.write("_root ignore, returning %s\n" % (file_path_from_root)) + + # --root=FAKE_DIR is ignored + return file_path_from_root + + file_path_from_root = FixupPathFromRoot() + return re.sub(r'[^a-zA-Z0-9]', '_', file_path_from_root).upper() + '_' + +def CheckForHeaderGuard(filename, clean_lines, error): + """Checks that the file contains a header guard. + + Logs an error if no #ifndef header guard is present. For other + headers, checks that the full pathname is used. + + Args: + filename: The name of the C++ header file. + clean_lines: A CleansedLines instance containing the file. + error: The function to call with any errors found. + """ + + # Don't check for header guards if there are error suppression + # comments somewhere in this file. + # + # Because this is silencing a warning for a nonexistent line, we + # only support the very specific NOLINT(build/header_guard) syntax, + # and not the general NOLINT or NOLINT(*) syntax. + raw_lines = clean_lines.lines_without_raw_strings + for i in raw_lines: + if Search(r'//\s*NOLINT\(build/header_guard\)', i): + return + + # Allow pragma once instead of header guards + for i in raw_lines: + if Search(r'^\s*#pragma\s+once', i): + return + + cppvar = GetHeaderGuardCPPVariable(filename) + + ifndef = '' + ifndef_linenum = 0 + define = '' + endif = '' + endif_linenum = 0 + for linenum, line in enumerate(raw_lines): + linesplit = line.split() + if len(linesplit) >= 2: + # find the first occurrence of #ifndef and #define, save arg + if not ifndef and linesplit[0] == '#ifndef': + # set ifndef to the header guard presented on the #ifndef line. + ifndef = linesplit[1] + ifndef_linenum = linenum + if not define and linesplit[0] == '#define': + define = linesplit[1] + # find the last occurrence of #endif, save entire line + if line.startswith('#endif'): + endif = line + endif_linenum = linenum + + if not ifndef or not define or ifndef != define: + error(filename, 0, 'build/header_guard', 5, + 'No #ifndef header guard found, suggested CPP variable is: %s' % + cppvar) + return + + # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ + # for backward compatibility. + if ifndef != cppvar: + error_level = 0 + if ifndef != cppvar + '_': + error_level = 5 + + ParseNolintSuppressions(filename, raw_lines[ifndef_linenum], ifndef_linenum, + error) + error(filename, ifndef_linenum, 'build/header_guard', error_level, + '#ifndef header guard has wrong style, please use: %s' % cppvar) + + # Check for "//" comments on endif line. + ParseNolintSuppressions(filename, raw_lines[endif_linenum], endif_linenum, + error) + match = Match(r'#endif\s*//\s*' + cppvar + r'(_)?\b', endif) + if match: + if match.group(1) == '_': + # Issue low severity warning for deprecated double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif // %s"' % cppvar) + return + + # Didn't find the corresponding "//" comment. If this file does not + # contain any "//" comments at all, it could be that the compiler + # only wants "/**/" comments, look for those instead. + no_single_line_comments = True + for i in xrange(1, len(raw_lines) - 1): + line = raw_lines[i] + if Match(r'^(?:(?:\'(?:\.|[^\'])*\')|(?:"(?:\.|[^"])*")|[^\'"])*//', line): + no_single_line_comments = False + break + + if no_single_line_comments: + match = Match(r'#endif\s*/\*\s*' + cppvar + r'(_)?\s*\*/', endif) + if match: + if match.group(1) == '_': + # Low severity warning for double trailing underscore + error(filename, endif_linenum, 'build/header_guard', 0, + '#endif line should be "#endif /* %s */"' % cppvar) + return + + # Didn't find anything + error(filename, endif_linenum, 'build/header_guard', 5, + '#endif line should be "#endif // %s"' % cppvar) + +def CheckHeaderFileIncluded(filename, include_state, error): + """Logs an error if a source file does not include its header.""" + + # Do not check test files + fileinfo = FileInfo(filename) + if Search(_TEST_FILE_SUFFIX, fileinfo.BaseName()): + return + + for ext in GetHeaderExtensions(): + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + '.' + ext + if not os.path.exists(headerfile): + continue + headername = FileInfo(headerfile).RepositoryName() + first_include = None + include_uses_unix_dir_aliases = False + for section_list in include_state.include_list: + for f in section_list: + include_text = f[0] + if "./" in include_text: + include_uses_unix_dir_aliases = True + if headername in include_text or include_text in headername: + return + if not first_include: + first_include = f[1] + + message = '%s should include its header file %s' % (fileinfo.RepositoryName(), headername) + if include_uses_unix_dir_aliases: + message += ". Relative paths like . and .. are not allowed." + + error(filename, first_include, 'build/include', 5, message) + +def CheckForBadCharacters(filename, lines, error): + """Logs an error for each line containing bad characters. + + Two kinds of bad characters: + + 1. Unicode replacement characters: These indicate that either the file + contained invalid UTF-8 (likely) or Unicode replacement characters (which + it shouldn't). Note that it's possible for this to throw off line + numbering if the invalid UTF-8 occurred adjacent to a newline. + + 2. NUL bytes. These are problematic for some tools. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + for linenum, line in enumerate(lines): + if unicode_escape_decode('\ufffd') in line: + error(filename, linenum, 'readability/utf8', 5, + 'Line contains invalid UTF-8 (or Unicode replacement character).') + if '\0' in line: + error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') + +def CheckForNewlineAtEOF(filename, lines, error): + """Logs an error if there is no newline char at the end of the file. + + Args: + filename: The name of the current file. + lines: An array of strings, each representing a line of the file. + error: The function to call with any errors found. + """ + + # The array lines() was created by adding two newlines to the + # original file (go figure), then splitting on \n. + # To verify that the file ends in \n, we just have to make sure the + # last-but-two element of lines() exists and is empty. + if len(lines) < 3 or lines[-2]: + error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, + 'Could not find a newline character at the end of the file.') + +def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): + """Logs an error if we see /* ... */ or "..." that extend past one line. + + /* ... */ comments are legit inside macros, for one line. + Otherwise, we prefer // comments, so it's ok to warn about the + other. Likewise, it's ok for strings to extend across multiple + lines, as long as a line continuation character (backslash) + terminates each line. Although not currently prohibited by the C++ + style guide, it's ugly and unnecessary. We don't do well with either + in this lint program, so we warn about both. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remove all \\ (escaped backslashes) from the line. They are OK, and the + # second (escaped) slash may trigger later \" detection erroneously. + line = line.replace('\\\\', '') + + if line.count('/*') > line.count('*/'): + error(filename, linenum, 'readability/multiline_comment', 5, + 'Complex multi-line /*...*/-style comment found. ' + 'Lint may give bogus warnings. ' + 'Consider replacing these with //-style comments, ' + 'with #if 0...#endif, ' + 'or with more clearly structured multi-line comments.') + + if (line.count('"') - line.count('\\"')) % 2: + error(filename, linenum, 'readability/multiline_string', 5, + 'Multi-line string ("...") found. This lint script doesn\'t ' + 'do well with such strings, and may give bogus warnings. ' + 'Use C++11 raw strings or concatenation instead.') + +# (non-threadsafe name, thread-safe alternative, validation pattern) +# +# The validation pattern is used to eliminate false positives such as: +# _rand(); // false positive due to substring match. +# ->rand(); // some member function rand(). +# ACMRandom rand(seed); // some variable named rand. +# ISAACRandom rand(); // another variable named rand. +# +# Basically we require the return value of these functions to be used +# in some expression context on the same line by matching on some +# operator before the function name. This eliminates constructors and +# member function calls. +_UNSAFE_FUNC_PREFIX = r'(?:[-+*/=%^&|(<]\s*|>\s+)' +_THREADING_LIST = ( + ('asctime(', 'asctime_r(', _UNSAFE_FUNC_PREFIX + r'asctime\([^)]+\)'), + ('ctime(', 'ctime_r(', _UNSAFE_FUNC_PREFIX + r'ctime\([^)]+\)'), + ('getgrgid(', 'getgrgid_r(', _UNSAFE_FUNC_PREFIX + r'getgrgid\([^)]+\)'), + ('getgrnam(', 'getgrnam_r(', _UNSAFE_FUNC_PREFIX + r'getgrnam\([^)]+\)'), + ('getlogin(', 'getlogin_r(', _UNSAFE_FUNC_PREFIX + r'getlogin\(\)'), + ('getpwnam(', 'getpwnam_r(', _UNSAFE_FUNC_PREFIX + r'getpwnam\([^)]+\)'), + ('getpwuid(', 'getpwuid_r(', _UNSAFE_FUNC_PREFIX + r'getpwuid\([^)]+\)'), + ('gmtime(', 'gmtime_r(', _UNSAFE_FUNC_PREFIX + r'gmtime\([^)]+\)'), + ('localtime(', 'localtime_r(', _UNSAFE_FUNC_PREFIX + r'localtime\([^)]+\)'), + ('rand(', 'rand_r(', _UNSAFE_FUNC_PREFIX + r'rand\(\)'), + ('strtok(', 'strtok_r(', + _UNSAFE_FUNC_PREFIX + r'strtok\([^)]+\)'), + ('ttyname(', 'ttyname_r(', _UNSAFE_FUNC_PREFIX + r'ttyname\([^)]+\)'), + ) + +def CheckPosixThreading(filename, clean_lines, linenum, error): + """Checks for calls to thread-unsafe functions. + + Much code has been originally written without consideration of + multi-threading. Also, engineers are relying on their old experience; + they have learned posix before threading extensions were added. These + tests guide the engineers to use thread-safe functions (when using + posix directly). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + for single_thread_func, multithread_safe_func, pattern in _THREADING_LIST: + # Additional pattern matching check to confirm that this is the + # function we are looking for + if Search(pattern, line): + error(filename, linenum, 'runtime/threadsafe_fn', 2, + 'Consider using ' + multithread_safe_func + + '...) instead of ' + single_thread_func + + '...) for improved thread safety.') + +def CheckVlogArguments(filename, clean_lines, linenum, error): + """Checks that VLOG() is only used for defining a logging level. + + For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and + VLOG(FATAL) are not. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): + error(filename, linenum, 'runtime/vlog', 5, + 'VLOG() should be used with numeric verbosity level. ' + 'Use LOG() if you want symbolic severity levels.') + +# Matches invalid increment: *count++, which moves pointer instead of +# incrementing a value. +_RE_PATTERN_INVALID_INCREMENT = re.compile( + r'^\s*\*\w+(\+\+|--);') + +def CheckInvalidIncrement(filename, clean_lines, linenum, error): + """Checks for invalid increment *count++. + + For example following function: + void increment_counter(int* count) { + *count++; + } + is invalid, because it effectively does count++, moving pointer, and should + be replaced with ++*count, (*count)++ or *count += 1. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + if _RE_PATTERN_INVALID_INCREMENT.match(line): + error(filename, linenum, 'runtime/invalid_increment', 5, + 'Changing pointer instead of value (or unused value of operator*).') + +def IsMacroDefinition(clean_lines, linenum): + if Search(r'^#define', clean_lines[linenum]): + return True + + if linenum > 0 and Search(r'\\$', clean_lines[linenum - 1]): + return True + + return False + +def IsForwardClassDeclaration(clean_lines, linenum): + return Match(r'^\s*(\btemplate\b)*.*class\s+\w+;\s*$', clean_lines[linenum]) + +class _BlockInfo(object): + """Stores information about a generic block of code.""" + + def __init__(self, linenum, seen_open_brace): + self.starting_linenum = linenum + self.seen_open_brace = seen_open_brace + self.open_parentheses = 0 + self.inline_asm = _NO_ASM + self.check_namespace_indentation = False + + def CheckBegin(self, filename, clean_lines, linenum, error): + """Run checks that applies to text up to the opening brace. + + This is mostly for checking the text after the class identifier + and the "{", usually where the base class is specified. For other + blocks, there isn't much to check, so we always pass. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Run checks that applies to text after the closing brace. + + This is mostly used for checking end of namespace comments. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + pass + + def IsBlockInfo(self): + """Returns true if this block is a _BlockInfo. + + This is convenient for verifying that an object is an instance of + a _BlockInfo, but not an instance of any of the derived classes. + + Returns: + True for this class, False for derived classes. + """ + return self.__class__ == _BlockInfo + +class _ExternCInfo(_BlockInfo): + """Stores information about an 'extern "C"' block.""" + + def __init__(self, linenum): + _BlockInfo.__init__(self, linenum, True) + +class _ClassInfo(_BlockInfo): + """Stores information about a class.""" + + def __init__(self, name, class_or_struct, clean_lines, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name + self.is_derived = False + self.check_namespace_indentation = True + if class_or_struct == 'struct': + self.access = 'public' + self.is_struct = True + else: + self.access = 'private' + self.is_struct = False + + # Remember initial indentation level for this class. Using raw_lines here + # instead of elided to account for leading comments. + self.class_indent = GetIndentLevel(clean_lines.raw_lines[linenum]) + + # Try to find the end of the class. This will be confused by things like: + # class A { + # } *x = { ... + # + # But it's still good enough for CheckSectionSpacing. + self.last_line = 0 + depth = 0 + for i in range(linenum, clean_lines.NumLines()): + line = clean_lines.elided[i] + depth += line.count('{') - line.count('}') + if not depth: + self.last_line = i + break + + def CheckBegin(self, filename, clean_lines, linenum, error): + # Look for a bare ':' + if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): + self.is_derived = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + # If there is a DISALLOW macro, it should appear near the end of + # the class. + seen_last_thing_in_class = False + for i in xrange(linenum - 1, self.starting_linenum, -1): + match = Search( + r'\b(DISALLOW_COPY_AND_ASSIGN|DISALLOW_IMPLICIT_CONSTRUCTORS)\(' + + self.name + r'\)', + clean_lines.elided[i]) + if match: + if seen_last_thing_in_class: + error(filename, i, 'readability/constructors', 3, + match.group(1) + ' should be the last thing in the class') + break + + if not Match(r'^\s*$', clean_lines.elided[i]): + seen_last_thing_in_class = True + + # Check that closing brace is aligned with beginning of the class. + # Only do this if the closing brace is indented by only whitespaces. + # This means we will not check single-line class definitions. + indent = Match(r'^( *)\}', clean_lines.elided[linenum]) + if indent and len(indent.group(1)) != self.class_indent: + if self.is_struct: + parent = 'struct ' + self.name + else: + parent = 'class ' + self.name + error(filename, linenum, 'whitespace/indent', 3, + 'Closing brace should be aligned with beginning of %s' % parent) + +class _NamespaceInfo(_BlockInfo): + """Stores information about a namespace.""" + + def __init__(self, name, linenum): + _BlockInfo.__init__(self, linenum, False) + self.name = name or '' + self.check_namespace_indentation = True + + def CheckEnd(self, filename, clean_lines, linenum, error): + """Check end of namespace comments.""" + line = clean_lines.raw_lines[linenum] + + # Check how many lines is enclosed in this namespace. Don't issue + # warning for missing namespace comments if there aren't enough + # lines. However, do apply checks if there is already an end of + # namespace comment and it's incorrect. + # + # TODO(unknown): We always want to check end of namespace comments + # if a namespace is large, but sometimes we also want to apply the + # check if a short namespace contained nontrivial things (something + # other than forward declarations). There is currently no logic on + # deciding what these nontrivial things are, so this check is + # triggered by namespace size only, which works most of the time. + if (linenum - self.starting_linenum < 10 + and not Match(r'^\s*};*\s*(//|/\*).*\bnamespace\b', line)): + return + + # Look for matching comment at end of namespace. + # + # Note that we accept C style "/* */" comments for terminating + # namespaces, so that code that terminate namespaces inside + # preprocessor macros can be cpplint clean. + # + # We also accept stuff like "// end of namespace ." with the + # period at the end. + # + # Besides these, we don't accept anything else, otherwise we might + # get false negatives when existing comment is a substring of the + # expected namespace. + if self.name: + # Named namespace + if not Match((r'^\s*};*\s*(//|/\*).*\bnamespace\s+' + + re.escape(self.name) + r'[\*/\.\\\s]*$'), + line): + error(filename, linenum, 'readability/namespace', 5, + 'Namespace should be terminated with "// namespace %s"' % + self.name) + else: + # Anonymous namespace + if not Match(r'^\s*};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): + # If "// namespace anonymous" or "// anonymous namespace (more text)", + # mention "// anonymous namespace" as an acceptable form + if Match(r'^\s*}.*\b(namespace anonymous|anonymous namespace)\b', line): + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"' + ' or "// anonymous namespace"') + else: + error(filename, linenum, 'readability/namespace', 5, + 'Anonymous namespace should be terminated with "// namespace"') + +class _PreprocessorInfo(object): + """Stores checkpoints of nesting stacks when #if/#else is seen.""" + + def __init__(self, stack_before_if): + # The entire nesting stack before #if + self.stack_before_if = stack_before_if + + # The entire nesting stack up to #else + self.stack_before_else = [] + + # Whether we have already seen #else or #elif + self.seen_else = False + +class NestingState(object): + """Holds states related to parsing braces.""" + + def __init__(self): + # Stack for tracking all braces. An object is pushed whenever we + # see a "{", and popped when we see a "}". Only 3 types of + # objects are possible: + # - _ClassInfo: a class or struct. + # - _NamespaceInfo: a namespace. + # - _BlockInfo: some other type of block. + self.stack = [] + + # Top of the previous stack before each Update(). + # + # Because the nesting_stack is updated at the end of each line, we + # had to do some convoluted checks to find out what is the current + # scope at the beginning of the line. This check is simplified by + # saving the previous top of nesting stack. + # + # We could save the full stack, but we only need the top. Copying + # the full nesting stack would slow down cpplint by ~10%. + self.previous_stack_top = [] + + # Stack of _PreprocessorInfo objects. + self.pp_stack = [] + + def SeenOpenBrace(self): + """Check if we have seen the opening brace for the innermost block. + + Returns: + True if we have seen the opening brace, False if the innermost + block is still expecting an opening brace. + """ + return (not self.stack) or self.stack[-1].seen_open_brace + + def InNamespaceBody(self): + """Check if we are currently one level inside a namespace body. + + Returns: + True if top of the stack is a namespace block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _NamespaceInfo) + + def InExternC(self): + """Check if we are currently one level inside an 'extern "C"' block. + + Returns: + True if top of the stack is an extern block, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ExternCInfo) + + def InClassDeclaration(self): + """Check if we are currently one level inside a class or struct declaration. + + Returns: + True if top of the stack is a class/struct, False otherwise. + """ + return self.stack and isinstance(self.stack[-1], _ClassInfo) + + def InAsmBlock(self): + """Check if we are currently one level inside an inline ASM block. + + Returns: + True if the top of the stack is a block containing inline ASM. + """ + return self.stack and self.stack[-1].inline_asm != _NO_ASM + + def InTemplateArgumentList(self, clean_lines, linenum, pos): + """Check if current position is inside template argument list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + pos: position just after the suspected template argument. + Returns: + True if (linenum, pos) is inside template arguments. + """ + while linenum < clean_lines.NumLines(): + # Find the earliest character that might indicate a template argument + line = clean_lines.elided[linenum] + match = Match(r'^[^{};=\[\]\.<>]*(.)', line[pos:]) + if not match: + linenum += 1 + pos = 0 + continue + token = match.group(1) + pos += len(match.group(0)) + + # These things do not look like template argument list: + # class Suspect { + # class Suspect x; } + if token in ('{', '}', ';'): return False + + # These things look like template argument list: + # template + # template + # template + # template + if token in ('>', '=', '[', ']', '.'): return True + + # Check if token is an unmatched '<'. + # If not, move on to the next character. + if token != '<': + pos += 1 + if pos >= len(line): + linenum += 1 + pos = 0 + continue + + # We can't be sure if we just find a single '<', and need to + # find the matching '>'. + (_, end_line, end_pos) = CloseExpression(clean_lines, linenum, pos - 1) + if end_pos < 0: + # Not sure if template argument list or syntax error in file + return False + linenum = end_line + pos = end_pos + return False + + def UpdatePreprocessor(self, line): + """Update preprocessor stack. + + We need to handle preprocessors due to classes like this: + #ifdef SWIG + struct ResultDetailsPageElementExtensionPoint { + #else + struct ResultDetailsPageElementExtensionPoint : public Extension { + #endif + + We make the following assumptions (good enough for most files): + - Preprocessor condition evaluates to true from #if up to first + #else/#elif/#endif. + + - Preprocessor condition evaluates to false from #else/#elif up + to #endif. We still perform lint checks on these lines, but + these do not affect nesting stack. + + Args: + line: current line to check. + """ + if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): + # Beginning of #if block, save the nesting stack here. The saved + # stack will allow us to restore the parsing state in the #else case. + self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) + elif Match(r'^\s*#\s*(else|elif)\b', line): + # Beginning of #else block + if self.pp_stack: + if not self.pp_stack[-1].seen_else: + # This is the first #else or #elif block. Remember the + # whole nesting stack up to this point. This is what we + # keep after the #endif. + self.pp_stack[-1].seen_else = True + self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) + + # Restore the stack to how it was before the #if + self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) + else: + # TODO(unknown): unexpected #else, issue warning? + pass + elif Match(r'^\s*#\s*endif\b', line): + # End of #if or #else blocks. + if self.pp_stack: + # If we saw an #else, we will need to restore the nesting + # stack to its former state before the #else, otherwise we + # will just continue from where we left off. + if self.pp_stack[-1].seen_else: + # Here we can just use a shallow copy since we are the last + # reference to it. + self.stack = self.pp_stack[-1].stack_before_else + # Drop the corresponding #if + self.pp_stack.pop() + else: + # TODO(unknown): unexpected #endif, issue warning? + pass + + # TODO(unknown): Update() is too long, but we will refactor later. + def Update(self, filename, clean_lines, linenum, error): + """Update nesting state with current line. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Remember top of the previous nesting stack. + # + # The stack is always pushed/popped and not modified in place, so + # we can just do a shallow copy instead of copy.deepcopy. Using + # deepcopy would slow down cpplint by ~28%. + if self.stack: + self.previous_stack_top = self.stack[-1] + else: + self.previous_stack_top = None + + # Update pp_stack + self.UpdatePreprocessor(line) + + # Count parentheses. This is to avoid adding struct arguments to + # the nesting stack. + if self.stack: + inner_block = self.stack[-1] + depth_change = line.count('(') - line.count(')') + inner_block.open_parentheses += depth_change + + # Also check if we are starting or ending an inline assembly block. + if inner_block.inline_asm in (_NO_ASM, _END_ASM): + if (depth_change != 0 and + inner_block.open_parentheses == 1 and + _MATCH_ASM.match(line)): + # Enter assembly block + inner_block.inline_asm = _INSIDE_ASM + else: + # Not entering assembly block. If previous line was _END_ASM, + # we will now shift to _NO_ASM state. + inner_block.inline_asm = _NO_ASM + elif (inner_block.inline_asm == _INSIDE_ASM and + inner_block.open_parentheses == 0): + # Exit assembly block + inner_block.inline_asm = _END_ASM + + # Consume namespace declaration at the beginning of the line. Do + # this in a loop so that we catch same line declarations like this: + # namespace proto2 { namespace bridge { class MessageSet; } } + while True: + # Match start of namespace. The "\b\s*" below catches namespace + # declarations even if it weren't followed by a whitespace, this + # is so that we don't confuse our namespace checker. The + # missing spaces will be flagged by CheckSpacing. + namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) + if not namespace_decl_match: + break + + new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) + self.stack.append(new_namespace) + + line = namespace_decl_match.group(2) + if line.find('{') != -1: + new_namespace.seen_open_brace = True + line = line[line.find('{') + 1:] + + # Look for a class declaration in whatever is left of the line + # after parsing namespaces. The regexp accounts for decorated classes + # such as in: + # class LOCKABLE API Object { + # }; + class_decl_match = Match( + r'^(\s*(?:template\s*<[\w\s<>,:=]*>\s*)?' + r'(class|struct)\s+(?:[a-zA-Z0-9_]+\s+)*(\w+(?:::\w+)*))' + r'(.*)$', line) + if (class_decl_match and + (not self.stack or self.stack[-1].open_parentheses == 0)): + # We do not want to accept classes that are actually template arguments: + # template , + # template class Ignore3> + # void Function() {}; + # + # To avoid template argument cases, we scan forward and look for + # an unmatched '>'. If we see one, assume we are inside a + # template argument list. + end_declaration = len(class_decl_match.group(1)) + if not self.InTemplateArgumentList(clean_lines, linenum, end_declaration): + self.stack.append(_ClassInfo( + class_decl_match.group(3), class_decl_match.group(2), + clean_lines, linenum)) + line = class_decl_match.group(4) + + # If we have not yet seen the opening brace for the innermost block, + # run checks here. + if not self.SeenOpenBrace(): + self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) + + # Update access control if we are inside a class/struct + if self.stack and isinstance(self.stack[-1], _ClassInfo): + classinfo = self.stack[-1] + access_match = Match( + r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' + r':(?:[^:]|$)', + line) + if access_match: + classinfo.access = access_match.group(2) + + # Check that access keywords are indented +1 space. Skip this + # check if the keywords are not preceded by whitespaces. + indent = access_match.group(1) + if (len(indent) != classinfo.class_indent + 1 and + Match(r'^\s*$', indent)): + if classinfo.is_struct: + parent = 'struct ' + classinfo.name + else: + parent = 'class ' + classinfo.name + slots = '' + if access_match.group(3): + slots = access_match.group(3) + error(filename, linenum, 'whitespace/indent', 3, + '%s%s: should be indented +1 space inside %s' % ( + access_match.group(2), slots, parent)) + + # Consume braces or semicolons from what's left of the line + while True: + # Match first brace, semicolon, or closed parenthesis. + matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) + if not matched: + break + + token = matched.group(1) + if token == '{': + # If namespace or class hasn't seen a opening brace yet, mark + # namespace/class head as complete. Push a new block onto the + # stack otherwise. + if not self.SeenOpenBrace(): + self.stack[-1].seen_open_brace = True + elif Match(r'^extern\s*"[^"]*"\s*\{', line): + self.stack.append(_ExternCInfo(linenum)) + else: + self.stack.append(_BlockInfo(linenum, True)) + if _MATCH_ASM.match(line): + self.stack[-1].inline_asm = _BLOCK_ASM + + elif token == ';' or token == ')': + # If we haven't seen an opening brace yet, but we already saw + # a semicolon, this is probably a forward declaration. Pop + # the stack for these. + # + # Similarly, if we haven't seen an opening brace yet, but we + # already saw a closing parenthesis, then these are probably + # function arguments with extra "class" or "struct" keywords. + # Also pop these stack for these. + if not self.SeenOpenBrace(): + self.stack.pop() + else: # token == '}' + # Perform end of block checks and pop the stack. + if self.stack: + self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) + self.stack.pop() + line = matched.group(2) + + def InnermostClass(self): + """Get class info on the top of the stack. + + Returns: + A _ClassInfo object if we are inside a class, or None otherwise. + """ + for i in range(len(self.stack), 0, -1): + classinfo = self.stack[i - 1] + if isinstance(classinfo, _ClassInfo): + return classinfo + return None + + def CheckCompletedBlocks(self, filename, error): + """Checks that all classes and namespaces have been completely parsed. + + Call this when all lines in a file have been processed. + Args: + filename: The name of the current file. + error: The function to call with any errors found. + """ + # Note: This test can result in false positives if #ifdef constructs + # get in the way of brace matching. See the testBuildClass test in + # cpplint_unittest.py for an example of this. + for obj in self.stack: + if isinstance(obj, _ClassInfo): + error(filename, obj.starting_linenum, 'build/class', 5, + 'Failed to find complete declaration of class %s' % + obj.name) + elif isinstance(obj, _NamespaceInfo): + error(filename, obj.starting_linenum, 'build/namespaces', 5, + 'Failed to find complete declaration of namespace %s' % + obj.name) + +def CheckForNonStandardConstructs(filename, clean_lines, linenum, + nesting_state, error): + r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. + + Complain about several constructs which gcc-2 accepts, but which are + not standard C++. Warning about these in lint is one way to ease the + transition to new compilers. + - put storage class first (e.g. "static const" instead of "const static"). + - "%lld" instead of %qd" in printf-type functions. + - "%1$d" is non-standard in printf-type functions. + - "\%" is an undefined character escape sequence. + - text after #endif is not allowed. + - invalid inner-style forward declaration. + - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', + line): + error(filename, linenum, 'build/deprecated', 3, + '>? and ))?' + # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' + error(filename, linenum, 'runtime/member_string_references', 2, + 'const string& members are dangerous. It is much better to use ' + 'alternatives, such as pointers or simple constants.') + + # Everything else in this function operates on class declarations. + # Return early if the top of the nesting stack is not a class, or if + # the class head is not completed yet. + classinfo = nesting_state.InnermostClass() + if not classinfo or not classinfo.seen_open_brace: + return + + # The class may have been declared with namespace or classname qualifiers. + # The constructor and destructor will not have those qualifiers. + base_classname = classinfo.name.split('::')[-1] + + # Look for single-argument constructors that aren't marked explicit. + # Technically a valid construct, but against style. + explicit_constructor_match = Match( + r'\s+(?:(?:inline|constexpr)\s+)*(explicit\s+)?' + r'(?:(?:inline|constexpr)\s+)*%s\s*' + r'\(((?:[^()]|\([^()]*\))*)\)' + % re.escape(base_classname), + line) + + if explicit_constructor_match: + is_marked_explicit = explicit_constructor_match.group(1) + + if not explicit_constructor_match.group(2): + constructor_args = [] + else: + constructor_args = explicit_constructor_match.group(2).split(',') + + # collapse arguments so that commas in template parameter lists and function + # argument parameter lists don't split arguments in two + i = 0 + while i < len(constructor_args): + constructor_arg = constructor_args[i] + while (constructor_arg.count('<') > constructor_arg.count('>') or + constructor_arg.count('(') > constructor_arg.count(')')): + constructor_arg += ',' + constructor_args[i + 1] + del constructor_args[i + 1] + constructor_args[i] = constructor_arg + i += 1 + + variadic_args = [arg for arg in constructor_args if '&&...' in arg] + defaulted_args = [arg for arg in constructor_args if '=' in arg] + noarg_constructor = (not constructor_args or # empty arg list + # 'void' arg specifier + (len(constructor_args) == 1 and + constructor_args[0].strip() == 'void')) + onearg_constructor = ((len(constructor_args) == 1 and # exactly one arg + not noarg_constructor) or + # all but at most one arg defaulted + (len(constructor_args) >= 1 and + not noarg_constructor and + len(defaulted_args) >= len(constructor_args) - 1) or + # variadic arguments with zero or one argument + (len(constructor_args) <= 2 and + len(variadic_args) >= 1)) + initializer_list_constructor = bool( + onearg_constructor and + Search(r'\bstd\s*::\s*initializer_list\b', constructor_args[0])) + copy_constructor = bool( + onearg_constructor and + Match(r'((const\s+(volatile\s+)?)?|(volatile\s+(const\s+)?))?' + r'%s(\s*<[^>]*>)?(\s+const)?\s*(?:<\w+>\s*)?&' + % re.escape(base_classname), constructor_args[0].strip())) + + if (not is_marked_explicit and + onearg_constructor and + not initializer_list_constructor and + not copy_constructor): + if defaulted_args or variadic_args: + error(filename, linenum, 'runtime/explicit', 5, + 'Constructors callable with one argument ' + 'should be marked explicit.') + else: + error(filename, linenum, 'runtime/explicit', 5, + 'Single-parameter constructors should be marked explicit.') + elif is_marked_explicit and not onearg_constructor: + if noarg_constructor: + error(filename, linenum, 'runtime/explicit', 5, + 'Zero-parameter constructors should not be marked explicit.') + +def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): + """Checks for the correctness of various spacing around function calls. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Since function calls often occur inside if/for/while/switch + # expressions - which have their own, more liberal conventions - we + # first see if we should be looking inside such an expression for a + # function call, to which we can apply more strict standards. + fncall = line # if there's no control flow construct, look at whole line + for pattern in (r'\bif\s*\((.*)\)\s*{', + r'\bfor\s*\((.*)\)\s*{', + r'\bwhile\s*\((.*)\)\s*[{;]', + r'\bswitch\s*\((.*)\)\s*{'): + match = Search(pattern, line) + if match: + fncall = match.group(1) # look inside the parens for function calls + break + + # Except in if/for/while/switch, there should never be space + # immediately inside parens (eg "f( 3, 4 )"). We make an exception + # for nested parens ( (a+b) + c ). Likewise, there should never be + # a space before a ( when it's a function argument. I assume it's a + # function argument when the char before the whitespace is legal in + # a function name (alnum + _) and we're not starting a macro. Also ignore + # pointers and references to arrays and functions coz they're too tricky: + # we use a very simple way to recognize these: + # " (something)(maybe-something)" or + # " (something)(maybe-something," or + # " (something)[something]" + # Note that we assume the contents of [] to be short enough that + # they'll never need to wrap. + if ( # Ignore control structures. + not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b', + fncall) and + # Ignore pointers/references to functions. + not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and + # Ignore pointers/references to arrays. + not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): + if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space after ( in function call') + elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space after (') + if (Search(r'\w\s+\(', fncall) and + not Search(r'_{0,2}asm_{0,2}\s+_{0,2}volatile_{0,2}\s+\(', fncall) and + not Search(r'#\s*define|typedef|using\s+\w+\s*=', fncall) and + not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall) and + not Search(r'\bcase\s+\(', fncall)): + # TODO(unknown): Space after an operator function seem to be a common + # error, silence those for now by restricting them to highest verbosity. + if Search(r'\boperator_*\b', line): + error(filename, linenum, 'whitespace/parens', 0, + 'Extra space before ( in function call') + else: + error(filename, linenum, 'whitespace/parens', 4, + 'Extra space before ( in function call') + # If the ) is followed only by a newline or a { + newline, assume it's + # part of a control statement (if/while/etc), and don't complain + if Search(r'[^)]\s+\)\s*[^{\s]', fncall): + # If the closing parenthesis is preceded by only whitespaces, + # try to give a more descriptive error message. + if Search(r'^\s+\)', fncall): + error(filename, linenum, 'whitespace/parens', 2, + 'Closing ) should be moved to the previous line') + else: + error(filename, linenum, 'whitespace/parens', 2, + 'Extra space before )') + +def IsBlankLine(line): + """Returns true if the given line is blank. + + We consider a line to be blank if the line is empty or consists of + only white spaces. + + Args: + line: A line of a string. + + Returns: + True, if the given line is blank. + """ + return not line or line.isspace() + +def CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error): + is_namespace_indent_item = ( + len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.previous_stack_top, _NamespaceInfo) and + nesting_state.previous_stack_top == nesting_state.stack[-2]) + + if ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + clean_lines.elided, line): + CheckItemIndentationInNamespace(filename, clean_lines.elided, + line, error) + +def CheckForFunctionLengths(filename, clean_lines, linenum, + function_state, error): + """Reports for long function bodies. + + For an overview why this is done, see: + https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions + + Uses a simplistic algorithm assuming other style guidelines + (especially spacing) are followed. + Only checks unindented functions, so class members are unchecked. + Trivial bodies are unchecked, so constructors with huge initializer lists + may be missed. + Blank/comment lines are not counted so as to avoid encouraging the removal + of vertical space and comments just to get through a lint check. + NOLINT *on the last line of a function* disables this check. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + function_state: Current function name and lines in body so far. + error: The function to call with any errors found. + """ + lines = clean_lines.lines + line = lines[linenum] + joined_line = '' + + starting_func = False + regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... + match_result = Match(regexp, line) + if match_result: + # If the name is all caps and underscores, figure it's a macro and + # ignore it, unless it's TEST or TEST_F. + function_name = match_result.group(1).split()[-1] + if function_name == 'TEST' or function_name == 'TEST_F' or ( + not Match(r'[A-Z_]+$', function_name)): + starting_func = True + + if starting_func: + body_found = False + for start_linenum in xrange(linenum, clean_lines.NumLines()): + start_line = lines[start_linenum] + joined_line += ' ' + start_line.lstrip() + if Search(r'(;|})', start_line): # Declarations and trivial functions + body_found = True + break # ... ignore + if Search(r'{', start_line): + body_found = True + function = Search(r'((\w|:)*)\(', line).group(1) + if Match(r'TEST', function): # Handle TEST... macros + parameter_regexp = Search(r'(\(.*\))', joined_line) + if parameter_regexp: # Ignore bad syntax + function += parameter_regexp.group(1) + else: + function += '()' + function_state.Begin(function) + break + if not body_found: + # No body for the function (or evidence of a non-function) was found. + error(filename, linenum, 'readability/fn_size', 5, + 'Lint failed to find start of function body.') + elif Match(r'^\}\s*$', line): # function end + function_state.Check(error, filename, linenum) + function_state.End() + elif not Match(r'^\s*$', line): + function_state.Count() # Count non-blank/non-comment lines. + +_RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') + +def CheckComment(line, filename, linenum, next_line_start, error): + """Checks for common mistakes in comments. + + Args: + line: The line in question. + filename: The name of the current file. + linenum: The number of the line to check. + next_line_start: The first non-whitespace column of the next line. + error: The function to call with any errors found. + """ + commentpos = line.find('//') + if commentpos != -1: + # Check if the // may be in quotes. If so, ignore it + if re.sub(r'\\.', '', line[0:commentpos]).count('"') % 2 == 0: + # Allow one space for new scopes, two spaces otherwise: + if (not (Match(r'^.*{ *//', line) and next_line_start == commentpos) and + ((commentpos >= 1 and + line[commentpos-1] not in string.whitespace) or + (commentpos >= 2 and + line[commentpos-2] not in string.whitespace))): + error(filename, linenum, 'whitespace/comments', 2, + 'At least two spaces is best between code and comments') + + # Checks for common mistakes in TODO comments. + comment = line[commentpos:] + match = _RE_PATTERN_TODO.match(comment) + if match: + # One whitespace is correct; zero whitespace is handled elsewhere. + leading_whitespace = match.group(1) + if len(leading_whitespace) > 1: + error(filename, linenum, 'whitespace/todo', 2, + 'Too many spaces before TODO') + + username = match.group(2) + if not username: + error(filename, linenum, 'readability/todo', 2, + 'Missing username in TODO; it should look like ' + '"// TODO(my_username): Stuff."') + + middle_whitespace = match.group(3) + # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison + if middle_whitespace != ' ' and middle_whitespace != '': + error(filename, linenum, 'whitespace/todo', 2, + 'TODO(my_username) should be followed by a space') + + # If the comment contains an alphanumeric character, there + # should be a space somewhere between it and the // unless + # it's a /// or //! Doxygen comment. + if (Match(r'//[^ ]*\w', comment) and + not Match(r'(///|//\!)(\s+|$)', comment)): + error(filename, linenum, 'whitespace/comments', 4, + 'Should have a space between // and comment') + +def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for the correctness of various spacing issues in the code. + + Things we check for: spaces around operators, spaces after + if/for/while/switch, no spaces around parens in function calls, two + spaces between code and comment, don't start a block with a blank + line, don't end a function with a blank line, don't add a blank line + after public/protected/private, don't have too many blank lines in a row. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw = clean_lines.lines_without_raw_strings + line = raw[linenum] + + # Before nixing comments, check if the line is blank for no good + # reason. This includes the first line after a block is opened, and + # blank lines at the end of a function (ie, right before a line like '}' + # + # Skip all the blank line checks if we are immediately inside a + # namespace body. In other words, don't issue blank line warnings + # for this block: + # namespace { + # + # } + # + # A warning about missing end of namespace comments will be issued instead. + # + # Also skip blank line checks for 'extern "C"' blocks, which are formatted + # like namespaces. + if (IsBlankLine(line) and + not nesting_state.InNamespaceBody() and + not nesting_state.InExternC()): + elided = clean_lines.elided + prev_line = elided[linenum - 1] + prevbrace = prev_line.rfind('{') + # TODO(unknown): Don't complain if line before blank line, and line after, + # both start with alnums and are indented the same amount. + # This ignores whitespace at the start of a namespace block + # because those are not usually indented. + if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: + # OK, we have a blank line at the start of a code block. Before we + # complain, we check if it is an exception to the rule: The previous + # non-empty line has the parameters of a function header that are indented + # 4 spaces (because they did not fit in a 80 column line when placed on + # the same line as the function name). We also check for the case where + # the previous line is indented 6 spaces, which may happen when the + # initializers of a constructor do not fit into a 80 column line. + exception = False + if Match(r' {6}\w', prev_line): # Initializer list? + # We are looking for the opening column of initializer list, which + # should be indented 4 spaces to cause 6 space indentation afterwards. + search_position = linenum-2 + while (search_position >= 0 + and Match(r' {6}\w', elided[search_position])): + search_position -= 1 + exception = (search_position >= 0 + and elided[search_position][:5] == ' :') + else: + # Search for the function arguments or an initializer list. We use a + # simple heuristic here: If the line is indented 4 spaces; and we have a + # closing paren, without the opening paren, followed by an opening brace + # or colon (for initializer lists) we assume that it is the last line of + # a function header. If we have a colon indented 4 spaces, it is an + # initializer list. + exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', + prev_line) + or Match(r' {4}:', prev_line)) + + if not exception: + error(filename, linenum, 'whitespace/blank_line', 2, + 'Redundant blank line at the start of a code block ' + 'should be deleted.') + # Ignore blank lines at the end of a block in a long if-else + # chain, like this: + # if (condition1) { + # // Something followed by a blank line + # + # } else if (condition2) { + # // Something else + # } + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + if (next_line + and Match(r'\s*}', next_line) + and next_line.find('} else ') == -1): + error(filename, linenum, 'whitespace/blank_line', 3, + 'Redundant blank line at the end of a code block ' + 'should be deleted.') + + matched = Match(r'\s*(public|protected|private):', prev_line) + if matched: + error(filename, linenum, 'whitespace/blank_line', 3, + 'Do not leave a blank line after "%s:"' % matched.group(1)) + + # Next, check comments + next_line_start = 0 + if linenum + 1 < clean_lines.NumLines(): + next_line = raw[linenum + 1] + next_line_start = len(next_line) - len(next_line.lstrip()) + CheckComment(line, filename, linenum, next_line_start, error) + + # get rid of comments and strings + line = clean_lines.elided[linenum] + + # You shouldn't have spaces before your brackets, except maybe after + # 'delete []', 'return []() {};', or 'auto [abc, ...] = ...;'. + if Search(r'\w\s+\[', line) and not Search(r'(?:auto&?|delete|return)\s+\[', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Extra space before [') + + # In range-based for, we wanted spaces before and after the colon, but + # not around "::" tokens that might appear. + if (Search(r'for *\(.*[^:]:[^: ]', line) or + Search(r'for *\(.*[^: ]:[^:]', line)): + error(filename, linenum, 'whitespace/forcolon', 2, + 'Missing space around colon in range-based for loop') + +def CheckOperatorSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around operators. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Don't try to do spacing checks for operator methods. Do this by + # replacing the troublesome characters with something else, + # preserving column position for all other characters. + # + # The replacement is done repeatedly to avoid false positives from + # operators that call operators. + while True: + match = Match(r'^(.*\boperator\b)(\S+)(\s*\(.*)$', line) + if match: + line = match.group(1) + ('_' * len(match.group(2))) + match.group(3) + else: + break + + # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". + # Otherwise not. Note we only check for non-spaces on *both* sides; + # sometimes people put non-spaces on one side when aligning ='s among + # many lines (not that this is behavior that I approve of...) + if ((Search(r'[\w.]=', line) or + Search(r'=[\w.]', line)) + and not Search(r'\b(if|while|for) ', line) + # Operators taken from [lex.operators] in C++11 standard. + and not Search(r'(>=|<=|==|!=|&=|\^=|\|=|\+=|\*=|\/=|\%=)', line) + and not Search(r'operator=', line)): + error(filename, linenum, 'whitespace/operators', 4, + 'Missing spaces around =') + + # It's ok not to have spaces around binary operators like + - * /, but if + # there's too little whitespace, we get concerned. It's hard to tell, + # though, so we punt on this one for now. TODO. + + # You should always have whitespace around binary operators. + # + # Check <= and >= first to avoid false positives with < and >, then + # check non-include lines for spacing around < and >. + # + # If the operator is followed by a comma, assume it's be used in a + # macro context and don't do any checks. This avoids false + # positives. + # + # Note that && is not included here. This is because there are too + # many false positives due to RValue references. + match = Search(r'[^<>=!\s](==|!=|<=|>=|\|\|)[^<>=!\s,;\)]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around %s' % match.group(1)) + elif not Match(r'#.*include', line): + # Look for < that is not surrounded by spaces. This is only + # triggered if both sides are missing spaces, even though + # technically should should flag if at least one side is missing a + # space. This is done to avoid some false positives with shifts. + match = Match(r'^(.*[^\s<])<[^\s=<,]', line) + if match: + (_, _, end_pos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if end_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <') + + # Look for > that is not surrounded by spaces. Similar to the + # above, we only trigger if both sides are missing spaces to avoid + # false positives with shifts. + match = Match(r'^(.*[^-\s>])>[^\s=>,]', line) + if match: + (_, _, start_pos) = ReverseCloseExpression( + clean_lines, linenum, len(match.group(1))) + if start_pos <= -1: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >') + + # We allow no-spaces around << when used like this: 10<<20, but + # not otherwise (particularly, not when used as streams) + # + # We also allow operators following an opening parenthesis, since + # those tend to be macros that deal with operators. + match = Search(r'(operator|[^\s(<])(?:L|UL|LL|ULL|l|ul|ll|ull)?<<([^\s,=<])', line) + if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and + not (match.group(1) == 'operator' and match.group(2) == ';')): + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around <<') + + # We allow no-spaces around >> for almost anything. This is because + # C++11 allows ">>" to close nested templates, which accounts for + # most cases when ">>" is not followed by a space. + # + # We still warn on ">>" followed by alpha character, because that is + # likely due to ">>" being used for right shifts, e.g.: + # value >> alpha + # + # When ">>" is used to close templates, the alphanumeric letter that + # follows would be part of an identifier, and there should still be + # a space separating the template type and the identifier. + # type> alpha + match = Search(r'>>[a-zA-Z_]', line) + if match: + error(filename, linenum, 'whitespace/operators', 3, + 'Missing spaces around >>') + + # There shouldn't be space around unary operators + match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) + if match: + error(filename, linenum, 'whitespace/operators', 4, + 'Extra space for operator %s' % match.group(1)) + +def CheckParenthesisSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing around parentheses. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # No spaces after an if, while, switch, or for + match = Search(r' (if\(|for\(|while\(|switch\()', line) + if match: + error(filename, linenum, 'whitespace/parens', 5, + 'Missing space before ( in %s' % match.group(1)) + + # For if/for/while/switch, the left and right parens should be + # consistent about how many spaces are inside the parens, and + # there should either be zero or one spaces inside the parens. + # We don't want: "if ( foo)" or "if ( foo )". + # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. + match = Search(r'\b(if|for|while|switch)\s*' + r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', + line) + if match: + if len(match.group(2)) != len(match.group(4)): + if not (match.group(3) == ';' and + len(match.group(2)) == 1 + len(match.group(4)) or + not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): + error(filename, linenum, 'whitespace/parens', 5, + 'Mismatching spaces inside () in %s' % match.group(1)) + if len(match.group(2)) not in [0, 1]: + error(filename, linenum, 'whitespace/parens', 5, + 'Should have zero or one spaces inside ( and ) in %s' % + match.group(1)) + +def CheckCommaSpacing(filename, clean_lines, linenum, error): + """Checks for horizontal spacing near commas and semicolons. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + raw = clean_lines.lines_without_raw_strings + line = clean_lines.elided[linenum] + + # You should always have a space after a comma (either as fn arg or operator) + # + # This does not apply when the non-space character following the + # comma is another comma, since the only time when that happens is + # for empty macro arguments. + # + # We run this check in two passes: first pass on elided lines to + # verify that lines contain missing whitespaces, second pass on raw + # lines to confirm that those missing whitespaces are not due to + # elided comments. + if (Search(r',[^,\s]', ReplaceAll(r'\boperator\s*,\s*\(', 'F(', line)) and + Search(r',[^,\s]', raw[linenum])): + error(filename, linenum, 'whitespace/comma', 3, + 'Missing space after ,') + + # You should always have a space after a semicolon + # except for few corner cases + # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more + # space after ; + if Search(r';[^\s};\\)/]', line): + error(filename, linenum, 'whitespace/semicolon', 3, + 'Missing space after ;') + +def _IsType(clean_lines, nesting_state, expr): + """Check if expression looks like a type name, returns true if so. + + Args: + clean_lines: A CleansedLines instance containing the file. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + expr: The expression to check. + Returns: + True, if token looks like a type. + """ + # Keep only the last token in the expression + last_word = Match(r'^.*(\b\S+)$', expr) + if last_word: + token = last_word.group(1) + else: + token = expr + + # Match native types and stdint types + if _TYPES.match(token): + return True + + # Try a bit harder to match templated types. Walk up the nesting + # stack until we find something that resembles a typename + # declaration for what we are looking for. + typename_pattern = (r'\b(?:typename|class|struct)\s+' + re.escape(token) + + r'\b') + block_index = len(nesting_state.stack) - 1 + while block_index >= 0: + if isinstance(nesting_state.stack[block_index], _NamespaceInfo): + return False + + # Found where the opening brace is. We want to scan from this + # line up to the beginning of the function, minus a few lines. + # template + # class C + # : public ... { // start scanning here + last_line = nesting_state.stack[block_index].starting_linenum + + next_block_start = 0 + if block_index > 0: + next_block_start = nesting_state.stack[block_index - 1].starting_linenum + first_line = last_line + while first_line >= next_block_start: + if clean_lines.elided[first_line].find('template') >= 0: + break + first_line -= 1 + if first_line < next_block_start: + # Didn't find any "template" keyword before reaching the next block, + # there are probably no template things to check for this block + block_index -= 1 + continue + + # Look for typename in the specified range + for i in xrange(first_line, last_line + 1, 1): + if Search(typename_pattern, clean_lines.elided[i]): + return True + block_index -= 1 + + return False + +def CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error): + """Checks for horizontal spacing near commas. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Except after an opening paren, or after another opening brace (in case of + # an initializer list, for instance), you should have spaces before your + # braces when they are delimiting blocks, classes, namespaces etc. + # And since you should never have braces at the beginning of a line, + # this is an easy test. Except that braces used for initialization don't + # follow the same rule; we often don't want spaces before those. + match = Match(r'^(.*[^ ({>]){', line) + + if match: + # Try a bit harder to check for brace initialization. This + # happens in one of the following forms: + # Constructor() : initializer_list_{} { ... } + # Constructor{}.MemberFunction() + # Type variable{}; + # FunctionCall(type{}, ...); + # LastArgument(..., type{}); + # LOG(INFO) << type{} << " ..."; + # map_of_type[{...}] = ...; + # ternary = expr ? new type{} : nullptr; + # OuterTemplate{}> + # + # We check for the character following the closing brace, and + # silence the warning if it's one of those listed above, i.e. + # "{.;,)<>]:". + # + # To account for nested initializer list, we allow any number of + # closing braces up to "{;,)<". We can't simply silence the + # warning on first sight of closing brace, because that would + # cause false negatives for things that are not initializer lists. + # Silence this: But not this: + # Outer{ if (...) { + # Inner{...} if (...){ // Missing space before { + # }; } + # + # There is a false negative with this approach if people inserted + # spurious semicolons, e.g. "if (cond){};", but we will catch the + # spurious semicolon with a separate check. + leading_text = match.group(1) + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + trailing_text = '' + if endpos > -1: + trailing_text = endline[endpos:] + for offset in xrange(endlinenum + 1, + min(endlinenum + 3, clean_lines.NumLines() - 1)): + trailing_text += clean_lines.elided[offset] + # We also suppress warnings for `uint64_t{expression}` etc., as the style + # guide recommends brace initialization for integral types to avoid + # overflow/truncation. + if (not Match(r'^[\s}]*[{.;,)<>\]:]', trailing_text) + and not _IsType(clean_lines, nesting_state, leading_text)): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before {') + + # Make sure '} else {' has spaces. + if Search(r'}else', line): + error(filename, linenum, 'whitespace/braces', 5, + 'Missing space before else') + + # You shouldn't have a space before a semicolon at the end of the line. + # There's a special case for "for" since the style guide allows space before + # the semicolon there. + if Search(r':\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Semicolon defining empty statement. Use {} instead.') + elif Search(r'^\s*;\s*$', line): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Line contains only semicolon. If this should be an empty statement, ' + 'use {} instead.') + elif (Search(r'\s+;\s*$', line) and + not Search(r'\bfor\b', line)): + error(filename, linenum, 'whitespace/semicolon', 5, + 'Extra space before last semicolon. If this should be an empty ' + 'statement, use {} instead.') + +def IsDecltype(clean_lines, linenum, column): + """Check if the token ending on (linenum, column) is decltype(). + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: the number of the line to check. + column: end column of the token to check. + Returns: + True if this token is decltype() expression, False otherwise. + """ + (text, _, start_col) = ReverseCloseExpression(clean_lines, linenum, column) + if start_col < 0: + return False + if Search(r'\bdecltype\s*$', text[0:start_col]): + return True + return False + +def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): + """Checks for additional blank line issues related to sections. + + Currently the only thing checked here is blank line before protected/private. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + class_info: A _ClassInfo objects. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Skip checks if the class is small, where small means 25 lines or less. + # 25 lines seems like a good cutoff since that's the usual height of + # terminals, and any class that can't fit in one screen can't really + # be considered "small". + # + # Also skip checks if we are on the first line. This accounts for + # classes that look like + # class Foo { public: ... }; + # + # If we didn't find the end of the class, last_line would be zero, + # and the check will be skipped by the first condition. + if (class_info.last_line - class_info.starting_linenum <= 24 or + linenum <= class_info.starting_linenum): + return + + matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) + if matched: + # Issue warning if the line before public/protected/private was + # not a blank line, but don't do this if the previous line contains + # "class" or "struct". This can happen two ways: + # - We are at the beginning of the class. + # - We are forward-declaring an inner class that is semantically + # private, but needed to be public for implementation reasons. + # Also ignores cases where the previous line ends with a backslash as can be + # common when defining classes in C macros. + prev_line = clean_lines.lines[linenum - 1] + if (not IsBlankLine(prev_line) and + not Search(r'\b(class|struct)\b', prev_line) and + not Search(r'\\$', prev_line)): + # Try a bit harder to find the beginning of the class. This is to + # account for multi-line base-specifier lists, e.g.: + # class Derived + # : public Base { + end_class_head = class_info.starting_linenum + for i in range(class_info.starting_linenum, linenum): + if Search(r'\{\s*$', clean_lines.lines[i]): + end_class_head = i + break + if end_class_head < linenum - 1: + error(filename, linenum, 'whitespace/blank_line', 3, + '"%s:" should be preceded by a blank line' % matched.group(1)) + +def GetPreviousNonBlankLine(clean_lines, linenum): + """Return the most recent non-blank line and its line number. + + Args: + clean_lines: A CleansedLines instance containing the file contents. + linenum: The number of the line to check. + + Returns: + A tuple with two elements. The first element is the contents of the last + non-blank line before the current line, or the empty string if this is the + first non-blank line. The second is the line number of that line, or -1 + if this is the first non-blank line. + """ + + prevlinenum = linenum - 1 + while prevlinenum >= 0: + prevline = clean_lines.elided[prevlinenum] + if not IsBlankLine(prevline): # if not a blank line... + return (prevline, prevlinenum) + prevlinenum -= 1 + return ('', -1) + +def CheckBraces(filename, clean_lines, linenum, error): + """Looks for misplaced braces (e.g. at the end of line). + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] # get rid of comments and strings + + if Match(r'\s*{\s*$', line): + # We allow an open brace to start a line in the case where someone is using + # braces in a block to explicitly create a new scope, which is commonly used + # to control the lifetime of stack-allocated variables. Braces are also + # used for brace initializers inside function calls. We don't detect this + # perfectly: we just don't complain if the last non-whitespace character on + # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the + # previous line starts a preprocessor block. We also allow a brace on the + # following line if it is part of an array initialization and would not fit + # within the 80 character limit of the preceding line. + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if (not Search(r'[,;:}{(]\s*$', prevline) and + not Match(r'\s*#', prevline) and + not (GetLineWidth(prevline) > _line_length - 2 and '[]' in prevline)): + error(filename, linenum, 'whitespace/braces', 4, + '{ should almost always be at the end of the previous line') + + # An else clause should be on the same line as the preceding closing brace. + if Match(r'\s*else\b\s*(?:if\b|\{|$)', line): + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if Match(r'\s*}\s*$', prevline): + error(filename, linenum, 'whitespace/newline', 4, + 'An else should appear on the same line as the preceding }') + + # If braces come on one side of an else, they should be on both. + # However, we have to worry about "else if" that spans multiple lines! + if Search(r'else if\s*\(', line): # could be multi-line if + brace_on_left = bool(Search(r'}\s*else if\s*\(', line)) + # find the ( after the if + pos = line.find('else if') + pos = line.find('(', pos) + if pos > 0: + (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) + brace_on_right = endline[endpos:].find('{') != -1 + if brace_on_left != brace_on_right: # must be brace after if + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + elif Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): + error(filename, linenum, 'readability/braces', 5, + 'If an else has a brace on one side, it should have it on both') + + # Likewise, an else should never have the else clause on the same line + if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): + error(filename, linenum, 'whitespace/newline', 4, + 'Else clause should never be on same line as else (use 2 lines)') + + # In the same way, a do/while should never be on one line + if Match(r'\s*do [^\s{]', line): + error(filename, linenum, 'whitespace/newline', 4, + 'do/while clauses should not be on a single line') + + # Check single-line if/else bodies. The style guide says 'curly braces are not + # required for single-line statements'. We additionally allow multi-line, + # single statements, but we reject anything with more than one semicolon in + # it. This means that the first semicolon after the if should be at the end of + # its line, and the line after that should have an indent level equal to or + # lower than the if. We also check for ambiguous if/else nesting without + # braces. + if_else_match = Search(r'\b(if\s*(|constexpr)\s*\(|else\b)', line) + if if_else_match and not Match(r'\s*#', line): + if_indent = GetIndentLevel(line) + endline, endlinenum, endpos = line, linenum, if_else_match.end() + if_match = Search(r'\bif\s*(|constexpr)\s*\(', line) + if if_match: + # This could be a multiline if condition, so find the end first. + pos = if_match.end() - 1 + (endline, endlinenum, endpos) = CloseExpression(clean_lines, linenum, pos) + # Check for an opening brace, either directly after the if or on the next + # line. If found, this isn't a single-statement conditional. + if (not Match(r'\s*{', endline[endpos:]) + and not (Match(r'\s*$', endline[endpos:]) + and endlinenum < (len(clean_lines.elided) - 1) + and Match(r'\s*{', clean_lines.elided[endlinenum + 1]))): + while (endlinenum < len(clean_lines.elided) + and ';' not in clean_lines.elided[endlinenum][endpos:]): + endlinenum += 1 + endpos = 0 + if endlinenum < len(clean_lines.elided): + endline = clean_lines.elided[endlinenum] + # We allow a mix of whitespace and closing braces (e.g. for one-liner + # methods) and a single \ after the semicolon (for macros) + endpos = endline.find(';') + if not Match(r';[\s}]*(\\?)$', endline[endpos:]): + # Semicolon isn't the last character, there's something trailing. + # Output a warning if the semicolon is not contained inside + # a lambda expression. + if not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}]*\}\s*\)*[;,]\s*$', + endline): + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + elif endlinenum < len(clean_lines.elided) - 1: + # Make sure the next line is dedented + next_line = clean_lines.elided[endlinenum + 1] + next_indent = GetIndentLevel(next_line) + # With ambiguous nested if statements, this will error out on the + # if that *doesn't* match the else, regardless of whether it's the + # inner one or outer one. + if (if_match and Match(r'\s*else\b', next_line) + and next_indent != if_indent): + error(filename, linenum, 'readability/braces', 4, + 'Else clause should be indented at the same level as if. ' + 'Ambiguous nested if/else chains require braces.') + elif next_indent > if_indent: + error(filename, linenum, 'readability/braces', 4, + 'If/else bodies with multiple statements require braces') + +def CheckTrailingSemicolon(filename, clean_lines, linenum, error): + """Looks for redundant trailing semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + line = clean_lines.elided[linenum] + + # Block bodies should not be followed by a semicolon. Due to C++11 + # brace initialization, there are more places where semicolons are + # required than not, so we explicitly list the allowed rules rather + # than listing the disallowed ones. These are the places where "};" + # should be replaced by just "}": + # 1. Some flavor of block following closing parenthesis: + # for (;;) {}; + # while (...) {}; + # switch (...) {}; + # Function(...) {}; + # if (...) {}; + # if (...) else if (...) {}; + # + # 2. else block: + # if (...) else {}; + # + # 3. const member function: + # Function(...) const {}; + # + # 4. Block following some statement: + # x = 42; + # {}; + # + # 5. Block at the beginning of a function: + # Function(...) { + # {}; + # } + # + # Note that naively checking for the preceding "{" will also match + # braces inside multi-dimensional arrays, but this is fine since + # that expression will not contain semicolons. + # + # 6. Block following another block: + # while (true) {} + # {}; + # + # 7. End of namespaces: + # namespace {}; + # + # These semicolons seems far more common than other kinds of + # redundant semicolons, possibly due to people converting classes + # to namespaces. For now we do not warn for this case. + # + # Try matching case 1 first. + match = Match(r'^(.*\)\s*)\{', line) + if match: + # Matched closing parenthesis (case 1). Check the token before the + # matching opening parenthesis, and don't warn if it looks like a + # macro. This avoids these false positives: + # - macro that defines a base class + # - multi-line macro that defines a base class + # - macro that defines the whole class-head + # + # But we still issue warnings for macros that we know are safe to + # warn, specifically: + # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P + # - TYPED_TEST + # - INTERFACE_DEF + # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: + # + # We implement a list of safe macros instead of a list of + # unsafe macros, even though the latter appears less frequently in + # google code and would have been easier to implement. This is because + # the downside for getting the allowed checks wrong means some extra + # semicolons, while the downside for getting disallowed checks wrong + # would result in compile errors. + # + # In addition to macros, we also don't want to warn on + # - Compound literals + # - Lambdas + # - alignas specifier with anonymous structs + # - decltype + closing_brace_pos = match.group(1).rfind(')') + opening_parenthesis = ReverseCloseExpression( + clean_lines, linenum, closing_brace_pos) + if opening_parenthesis[2] > -1: + line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] + macro = Search(r'\b([A-Z_][A-Z0-9_]*)\s*$', line_prefix) + func = Match(r'^(.*\])\s*$', line_prefix) + if ((macro and + macro.group(1) not in ( + 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', + 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', + 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or + (func and not Search(r'\boperator\s*\[\s*\]', func.group(1))) or + Search(r'\b(?:struct|union)\s+alignas\s*$', line_prefix) or + Search(r'\bdecltype$', line_prefix) or + Search(r'\s+=\s*$', line_prefix)): + match = None + if (match and + opening_parenthesis[1] > 1 and + Search(r'\]\s*$', clean_lines.elided[opening_parenthesis[1] - 1])): + # Multi-line lambda-expression + match = None + + else: + # Try matching cases 2-3. + match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) + if not match: + # Try matching cases 4-6. These are always matched on separate lines. + # + # Note that we can't simply concatenate the previous line to the + # current line and do a single match, otherwise we may output + # duplicate warnings for the blank line case: + # if (cond) { + # // blank line + # } + prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] + if prevline and Search(r'[;{}]\s*$', prevline): + match = Match(r'^(\s*)\{', line) + + # Check matching closing brace + if match: + (endline, endlinenum, endpos) = CloseExpression( + clean_lines, linenum, len(match.group(1))) + if endpos > -1 and Match(r'^\s*;', endline[endpos:]): + # Current {} pair is eligible for semicolon check, and we have found + # the redundant semicolon, output warning here. + # + # Note: because we are scanning forward for opening braces, and + # outputting warnings for the matching closing brace, if there are + # nested blocks with trailing semicolons, we will get the error + # messages in reversed order. + + # We need to check the line forward for NOLINT + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[endlinenum-1], endlinenum-1, + error) + ParseNolintSuppressions(filename, raw_lines[endlinenum], endlinenum, + error) + + error(filename, endlinenum, 'readability/braces', 4, + "You don't need a ; after a }") + +def CheckEmptyBlockBody(filename, clean_lines, linenum, error): + """Look for empty loop/conditional body with only a single semicolon. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Search for loop keywords at the beginning of the line. Because only + # whitespaces are allowed before the keywords, this will also ignore most + # do-while-loops, since those lines should start with closing brace. + # + # We also check "if" blocks here, since an empty conditional block + # is likely an error. + line = clean_lines.elided[linenum] + matched = Match(r'\s*(for|while|if)\s*\(', line) + if matched: + # Find the end of the conditional expression. + (end_line, end_linenum, end_pos) = CloseExpression( + clean_lines, linenum, line.find('(')) + + # Output warning if what follows the condition expression is a semicolon. + # No warning for all other cases, including whitespace or newline, since we + # have a separate check for semicolons preceded by whitespace. + if end_pos >= 0 and Match(r';', end_line[end_pos:]): + if matched.group(1) == 'if': + error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, + 'Empty conditional bodies should use {}') + else: + error(filename, end_linenum, 'whitespace/empty_loop_body', 5, + 'Empty loop bodies should use {} or continue') + + # Check for if statements that have completely empty bodies (no comments) + # and no else clauses. + if end_pos >= 0 and matched.group(1) == 'if': + # Find the position of the opening { for the if statement. + # Return without logging an error if it has no brackets. + opening_linenum = end_linenum + opening_line_fragment = end_line[end_pos:] + # Loop until EOF or find anything that's not whitespace or opening {. + while not Search(r'^\s*\{', opening_line_fragment): + if Search(r'^(?!\s*$)', opening_line_fragment): + # Conditional has no brackets. + return + opening_linenum += 1 + if opening_linenum == len(clean_lines.elided): + # Couldn't find conditional's opening { or any code before EOF. + return + opening_line_fragment = clean_lines.elided[opening_linenum] + # Set opening_line (opening_line_fragment may not be entire opening line). + opening_line = clean_lines.elided[opening_linenum] + + # Find the position of the closing }. + opening_pos = opening_line_fragment.find('{') + if opening_linenum == end_linenum: + # We need to make opening_pos relative to the start of the entire line. + opening_pos += end_pos + (closing_line, closing_linenum, closing_pos) = CloseExpression( + clean_lines, opening_linenum, opening_pos) + if closing_pos < 0: + return + + # Now construct the body of the conditional. This consists of the portion + # of the opening line after the {, all lines until the closing line, + # and the portion of the closing line before the }. + if (clean_lines.raw_lines[opening_linenum] != + CleanseComments(clean_lines.raw_lines[opening_linenum])): + # Opening line ends with a comment, so conditional isn't empty. + return + if closing_linenum > opening_linenum: + # Opening line after the {. Ignore comments here since we checked above. + bodylist = list(opening_line[opening_pos+1:]) + # All lines until closing line, excluding closing line, with comments. + bodylist.extend(clean_lines.raw_lines[opening_linenum+1:closing_linenum]) + # Closing line before the }. Won't (and can't) have comments. + bodylist.append(clean_lines.elided[closing_linenum][:closing_pos-1]) + body = '\n'.join(bodylist) + else: + # If statement has brackets and fits on a single line. + body = opening_line[opening_pos+1:closing_pos-1] + + # Check if the body is empty + if not _EMPTY_CONDITIONAL_BODY_PATTERN.search(body): + return + # The body is empty. Now make sure there's not an else clause. + current_linenum = closing_linenum + current_line_fragment = closing_line[closing_pos:] + # Loop until EOF or find anything that's not whitespace or else clause. + while Search(r'^\s*$|^(?=\s*else)', current_line_fragment): + if Search(r'^(?=\s*else)', current_line_fragment): + # Found an else clause, so don't log an error. + return + current_linenum += 1 + if current_linenum == len(clean_lines.elided): + break + current_line_fragment = clean_lines.elided[current_linenum] + + # The body is empty and there's no else clause until EOF or other code. + error(filename, end_linenum, 'whitespace/empty_if_body', 4, + ('If statement had no body and no else clause')) + +def FindCheckMacro(line): + """Find a replaceable CHECK-like macro. + + Args: + line: line to search on. + Returns: + (macro name, start position), or (None, -1) if no replaceable + macro is found. + """ + for macro in _CHECK_MACROS: + i = line.find(macro) + if i >= 0: + # Find opening parenthesis. Do a regular expression match here + # to make sure that we are matching the expected CHECK macro, as + # opposed to some other macro that happens to contain the CHECK + # substring. + matched = Match(r'^(.*\b' + macro + r'\s*)\(', line) + if not matched: + continue + return (macro, len(matched.group(1))) + return (None, -1) + +def CheckCheck(filename, clean_lines, linenum, error): + """Checks the use of CHECK and EXPECT macros. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + + # Decide the set of replacement macros that should be suggested + lines = clean_lines.elided + (check_macro, start_pos) = FindCheckMacro(lines[linenum]) + if not check_macro: + return + + # Find end of the boolean expression by matching parentheses + (last_line, end_line, end_pos) = CloseExpression( + clean_lines, linenum, start_pos) + if end_pos < 0: + return + + # If the check macro is followed by something other than a + # semicolon, assume users will log their own custom error messages + # and don't suggest any replacements. + if not Match(r'\s*;', last_line[end_pos:]): + return + + if linenum == end_line: + expression = lines[linenum][start_pos + 1:end_pos - 1] + else: + expression = lines[linenum][start_pos + 1:] + for i in xrange(linenum + 1, end_line): + expression += lines[i] + expression += last_line[0:end_pos - 1] + + # Parse expression so that we can take parentheses into account. + # This avoids false positives for inputs like "CHECK((a < 4) == b)", + # which is not replaceable by CHECK_LE. + lhs = '' + rhs = '' + operator = None + while expression: + matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' + r'==|!=|>=|>|<=|<|\()(.*)$', expression) + if matched: + token = matched.group(1) + if token == '(': + # Parenthesized operand + expression = matched.group(2) + (end, _) = FindEndOfExpressionInLine(expression, 0, ['(']) + if end < 0: + return # Unmatched parenthesis + lhs += '(' + expression[0:end] + expression = expression[end:] + elif token in ('&&', '||'): + # Logical and/or operators. This means the expression + # contains more than one term, for example: + # CHECK(42 < a && a < b); + # + # These are not replaceable with CHECK_LE, so bail out early. + return + elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): + # Non-relational operator + lhs += token + expression = matched.group(2) + else: + # Relational operator + operator = token + rhs = matched.group(2) + break + else: + # Unparenthesized operand. Instead of appending to lhs one character + # at a time, we do another regular expression match to consume several + # characters at once if possible. Trivial benchmark shows that this + # is more efficient when the operands are longer than a single + # character, which is generally the case. + matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) + if not matched: + matched = Match(r'^(\s*\S)(.*)$', expression) + if not matched: + break + lhs += matched.group(1) + expression = matched.group(2) + + # Only apply checks if we got all parts of the boolean expression + if not (lhs and operator and rhs): + return + + # Check that rhs do not contain logical operators. We already know + # that lhs is fine since the loop above parses out && and ||. + if rhs.find('&&') > -1 or rhs.find('||') > -1: + return + + # At least one of the operands must be a constant literal. This is + # to avoid suggesting replacements for unprintable things like + # CHECK(variable != iterator) + # + # The following pattern matches decimal, hex integers, strings, and + # characters (in that order). + lhs = lhs.strip() + rhs = rhs.strip() + match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' + if Match(match_constant, lhs) or Match(match_constant, rhs): + # Note: since we know both lhs and rhs, we can provide a more + # descriptive error message like: + # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) + # Instead of: + # Consider using CHECK_EQ instead of CHECK(a == b) + # + # We are still keeping the less descriptive message because if lhs + # or rhs gets long, the error message might become unreadable. + error(filename, linenum, 'readability/check', 2, + 'Consider using %s instead of %s(a %s b)' % ( + _CHECK_REPLACEMENT[check_macro][operator], + check_macro, operator)) + +def CheckAltTokens(filename, clean_lines, linenum, error): + """Check alternative keywords being used in boolean expressions. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Avoid preprocessor lines + if Match(r'^\s*#', line): + return + + # Last ditch effort to avoid multi-line comments. This will not help + # if the comment started before the current line or ended after the + # current line, but it catches most of the false positives. At least, + # it provides a way to workaround this warning for people who use + # multi-line comments in preprocessor macros. + # + # TODO(unknown): remove this once cpplint has better support for + # multi-line comments. + if line.find('/*') >= 0 or line.find('*/') >= 0: + return + + for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): + error(filename, linenum, 'readability/alt_tokens', 2, + 'Use operator %s instead of %s' % ( + _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) + +def GetLineWidth(line): + """Determines the width of the line in column positions. + + Args: + line: A string, which may be a Unicode string. + + Returns: + The width of the line in column positions, accounting for Unicode + combining characters and wide characters. + """ + if isinstance(line, unicode): + width = 0 + for uc in unicodedata.normalize('NFC', line): + if unicodedata.east_asian_width(uc) in ('W', 'F'): + width += 2 + elif not unicodedata.combining(uc): + # Issue 337 + # https://mail.python.org/pipermail/python-list/2012-August/628809.html + if (sys.version_info.major, sys.version_info.minor) <= (3, 2): + # https://github.com/python/cpython/blob/2.7/Include/unicodeobject.h#L81 + is_wide_build = sysconfig.get_config_var("Py_UNICODE_SIZE") >= 4 + # https://github.com/python/cpython/blob/2.7/Objects/unicodeobject.c#L564 + is_low_surrogate = 0xDC00 <= ord(uc) <= 0xDFFF + if not is_wide_build and is_low_surrogate: + width -= 1 + + width += 1 + return width + else: + return len(line) + +def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, + error): + """Checks rules from the 'C++ style rules' section of cppguide.html. + + Most of these rules are hard to test (naming, comment style), but we + do what we can. In particular we check for 2-space indents, line lengths, + tab usage, spaces inside code, etc. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + + # Don't use "elided" lines here, otherwise we can't check commented lines. + # Don't want to use "raw" either, because we don't want to check inside C++11 + # raw strings, + raw_lines = clean_lines.lines_without_raw_strings + line = raw_lines[linenum] + prev = raw_lines[linenum - 1] if linenum > 0 else '' + + if line.find('\t') != -1: + error(filename, linenum, 'whitespace/tab', 1, + 'Tab found; better to use spaces') + + # One or three blank spaces at the beginning of the line is weird; it's + # hard to reconcile that with 2-space indents. + # NOTE: here are the conditions rob pike used for his tests. Mine aren't + # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces + # if(RLENGTH > 20) complain = 0; + # if(match($0, " +(error|private|public|protected):")) complain = 0; + # if(match(prev, "&& *$")) complain = 0; + # if(match(prev, "\\|\\| *$")) complain = 0; + # if(match(prev, "[\",=><] *$")) complain = 0; + # if(match($0, " <<")) complain = 0; + # if(match(prev, " +for \\(")) complain = 0; + # if(prevodd && match(prevprev, " +for \\(")) complain = 0; + scope_or_label_pattern = r'\s*(?:public|private|protected|signals)(?:\s+(?:slots\s*)?)?:\s*\\?$' + classinfo = nesting_state.InnermostClass() + initial_spaces = 0 + cleansed_line = clean_lines.elided[linenum] + while initial_spaces < len(line) and line[initial_spaces] == ' ': + initial_spaces += 1 + # There are certain situations we allow one space, notably for + # section labels, and also lines containing multi-line raw strings. + # We also don't check for lines that look like continuation lines + # (of lines ending in double quotes, commas, equals, or angle brackets) + # because the rules for how to indent those are non-trivial. + if (not Search(r'[",=><] *$', prev) and + (initial_spaces == 1 or initial_spaces == 3) and + not Match(scope_or_label_pattern, cleansed_line) and + not (clean_lines.raw_lines[linenum] != line and + Match(r'^\s*""', line))): + error(filename, linenum, 'whitespace/indent', 3, + 'Weird number of spaces at line-start. ' + 'Are you using a 2-space indent?') + + if line and line[-1].isspace(): + error(filename, linenum, 'whitespace/end_of_line', 4, + 'Line ends in whitespace. Consider deleting these extra spaces.') + + # Check if the line is a header guard. + is_header_guard = False + if IsHeaderExtension(file_extension): + cppvar = GetHeaderGuardCPPVariable(filename) + if (line.startswith('#ifndef %s' % cppvar) or + line.startswith('#define %s' % cppvar) or + line.startswith('#endif // %s' % cppvar)): + is_header_guard = True + # #include lines and header guards can be long, since there's no clean way to + # split them. + # + # URLs can be long too. It's possible to split these, but it makes them + # harder to cut&paste. + # + # The "$Id:...$" comment may also get very long without it being the + # developers fault. + # + # Doxygen documentation copying can get pretty long when using an overloaded + # function declaration + if (not line.startswith('#include') and not is_header_guard and + not Match(r'^\s*//.*http(s?)://\S*$', line) and + not Match(r'^\s*//\s*[^\s]*$', line) and + not Match(r'^// \$Id:.*#[0-9]+ \$$', line) and + not Match(r'^\s*/// [@\\](copydoc|copydetails|copybrief) .*$', line)): + line_width = GetLineWidth(line) + if line_width > _line_length: + error(filename, linenum, 'whitespace/line_length', 2, + 'Lines should be <= %i characters long' % _line_length) + + if (cleansed_line.count(';') > 1 and + # allow simple single line lambdas + not Match(r'^[^{};]*\[[^\[\]]*\][^{}]*\{[^{}\n\r]*\}', + line) and + # for loops are allowed two ;'s (and may run over two lines). + cleansed_line.find('for') == -1 and + (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or + GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and + # It's ok to have many commands in a switch case that fits in 1 line + not ((cleansed_line.find('case ') != -1 or + cleansed_line.find('default:') != -1) and + cleansed_line.find('break;') != -1)): + error(filename, linenum, 'whitespace/newline', 0, + 'More than one command on the same line') + + # Some more style checks + CheckBraces(filename, clean_lines, linenum, error) + CheckTrailingSemicolon(filename, clean_lines, linenum, error) + CheckEmptyBlockBody(filename, clean_lines, linenum, error) + CheckSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckOperatorSpacing(filename, clean_lines, linenum, error) + CheckParenthesisSpacing(filename, clean_lines, linenum, error) + CheckCommaSpacing(filename, clean_lines, linenum, error) + CheckBracesSpacing(filename, clean_lines, linenum, nesting_state, error) + CheckSpacingForFunctionCall(filename, clean_lines, linenum, error) + CheckCheck(filename, clean_lines, linenum, error) + CheckAltTokens(filename, clean_lines, linenum, error) + classinfo = nesting_state.InnermostClass() + if classinfo: + CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) + +_RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') +# Matches the first component of a filename delimited by -s and _s. That is: +# _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' +# _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' +_RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') + +def _DropCommonSuffixes(filename): + """Drops common suffixes like _test.cc or -inl.h from filename. + + For example: + >>> _DropCommonSuffixes('foo/foo-inl.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/bar/foo.cc') + 'foo/bar/foo' + >>> _DropCommonSuffixes('foo/foo_internal.h') + 'foo/foo' + >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') + 'foo/foo_unusualinternal' + + Args: + filename: The input filename. + + Returns: + The filename with the common suffix removed. + """ + for suffix in itertools.chain( + ('%s.%s' % (test_suffix.lstrip('_'), ext) + for test_suffix, ext in itertools.product(_test_suffixes, GetNonHeaderExtensions())), + ('%s.%s' % (suffix, ext) + for suffix, ext in itertools.product(['inl', 'imp', 'internal'], GetHeaderExtensions()))): + if (filename.endswith(suffix) and len(filename) > len(suffix) and + filename[-len(suffix) - 1] in ('-', '_')): + return filename[:-len(suffix) - 1] + return os.path.splitext(filename)[0] + +def _ClassifyInclude(fileinfo, include, used_angle_brackets, include_order="default"): + """Figures out what kind of header 'include' is. + + Args: + fileinfo: The current file cpplint is running over. A FileInfo instance. + include: The path to a #included file. + used_angle_brackets: True if the #include used <> rather than "". + include_order: "default" or other value allowed in program arguments + + Returns: + One of the _XXX_HEADER constants. + + For example: + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) + _C_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) + _CPP_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', True, "standardcfirst") + _OTHER_SYS_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) + _LIKELY_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), + ... 'bar/foo_other_ext.h', False) + _POSSIBLE_MY_HEADER + >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) + _OTHER_HEADER + """ + # This is a list of all standard c++ header files, except + # those already checked for above. + is_cpp_header = include in _CPP_HEADERS + + # Mark include as C header if in list or in a known folder for standard-ish C headers. + is_std_c_header = (include_order == "default") or (include in _C_HEADERS + # additional linux glibc header folders + or Search(r'(?:%s)\/.*\.h' % "|".join(C_STANDARD_HEADER_FOLDERS), include)) + + # Headers with C++ extensions shouldn't be considered C system headers + is_system = used_angle_brackets and not os.path.splitext(include)[1] in ['.hpp', '.hxx', '.h++'] + + if is_system: + if is_cpp_header: + return _CPP_SYS_HEADER + if is_std_c_header: + return _C_SYS_HEADER + else: + return _OTHER_SYS_HEADER + + # If the target file and the include we're checking share a + # basename when we drop common extensions, and the include + # lives in . , then it's likely to be owned by the target file. + target_dir, target_base = ( + os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) + include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) + target_dir_pub = os.path.normpath(target_dir + '/../public') + target_dir_pub = target_dir_pub.replace('\\', '/') + if target_base == include_base and ( + include_dir == target_dir or + include_dir == target_dir_pub): + return _LIKELY_MY_HEADER + + # If the target and include share some initial basename + # component, it's possible the target is implementing the + # include, so it's allowed to be first, but we'll never + # complain if it's not there. + target_first_component = _RE_FIRST_COMPONENT.match(target_base) + include_first_component = _RE_FIRST_COMPONENT.match(include_base) + if (target_first_component and include_first_component and + target_first_component.group(0) == + include_first_component.group(0)): + return _POSSIBLE_MY_HEADER + + return _OTHER_HEADER + +def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): + """Check rules that are applicable to #include lines. + + Strings on #include lines are NOT removed from elided line, to make + certain tasks easier. However, to prevent false positives, checks + applicable to #include lines in CheckLanguage must be put here. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + include_state: An _IncludeState instance in which the headers are inserted. + error: The function to call with any errors found. + """ + fileinfo = FileInfo(filename) + line = clean_lines.lines[linenum] + + # "include" should use the new style "foo/bar.h" instead of just "bar.h" + # Only do this check if the included header follows google naming + # conventions. If not, assume that it's a 3rd party API that + # requires special include conventions. + # + # We also make an exception for Lua headers, which follow google + # naming convention but not the include convention. + match = Match(r'#include\s*"([^/]+\.h)"', line) + if match and not _THIRD_PARTY_HEADERS_PATTERN.match(match.group(1)): + error(filename, linenum, 'build/include_subdir', 4, + 'Include the directory when naming .h files') + + # we shouldn't include a file more than once. actually, there are a + # handful of instances where doing so is okay, but in general it's + # not. + match = _RE_PATTERN_INCLUDE.search(line) + if match: + include = match.group(2) + used_angle_brackets = (match.group(1) == '<') + duplicate_line = include_state.FindHeader(include) + if duplicate_line >= 0: + error(filename, linenum, 'build/include', 4, + '"%s" already included at %s:%s' % + (include, filename, duplicate_line)) + return + + for extension in GetNonHeaderExtensions(): + if (include.endswith('.' + extension) and + os.path.dirname(fileinfo.RepositoryName()) != os.path.dirname(include)): + error(filename, linenum, 'build/include', 4, + 'Do not include .' + extension + ' files from other packages') + return + + # We DO want to include a 3rd party looking header if it matches the + # filename. Otherwise we get an erroneous error "...should include its + # header" error later. + third_src_header = False + for ext in GetHeaderExtensions(): + basefilename = filename[0:len(filename) - len(fileinfo.Extension())] + headerfile = basefilename + '.' + ext + headername = FileInfo(headerfile).RepositoryName() + if headername in include or include in headername: + third_src_header = True + break + + if third_src_header or not _THIRD_PARTY_HEADERS_PATTERN.match(include): + include_state.include_list[-1].append((include, linenum)) + + # We want to ensure that headers appear in the right order: + # 1) for foo.cc, foo.h (preferred location) + # 2) c system files + # 3) cpp system files + # 4) for foo.cc, foo.h (deprecated location) + # 5) other google headers + # + # We classify each include statement as one of those 5 types + # using a number of techniques. The include_state object keeps + # track of the highest type seen, and complains if we see a + # lower type after that. + error_message = include_state.CheckNextIncludeOrder( + _ClassifyInclude(fileinfo, include, used_angle_brackets, _include_order)) + if error_message: + error(filename, linenum, 'build/include_order', 4, + '%s. Should be: %s.h, c system, c++ system, other.' % + (error_message, fileinfo.BaseName())) + canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) + if not include_state.IsInAlphabeticalOrder( + clean_lines, linenum, canonical_include): + error(filename, linenum, 'build/include_alpha', 4, + 'Include "%s" not in alphabetical order' % include) + include_state.SetLastHeader(canonical_include) + +def _GetTextInside(text, start_pattern): + r"""Retrieves all the text between matching open and close parentheses. + + Given a string of lines and a regular expression string, retrieve all the text + following the expression and between opening punctuation symbols like + (, [, or {, and the matching close-punctuation symbol. This properly nested + occurrences of the punctuations, so for the text like + printf(a(), b(c())); + a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. + start_pattern must match string having an open punctuation symbol at the end. + + Args: + text: The lines to extract text. Its comments and strings must be elided. + It can be single line and can span multiple lines. + start_pattern: The regexp string indicating where to start extracting + the text. + Returns: + The extracted text. + None if either the opening string or ending punctuation could not be found. + """ + # TODO(unknown): Audit cpplint.py to see what places could be profitably + # rewritten to use _GetTextInside (and use inferior regexp matching today). + + # Give opening punctuations to get the matching close-punctuations. + matching_punctuation = {'(': ')', '{': '}', '[': ']'} + closing_punctuation = set(itervalues(matching_punctuation)) + + # Find the position to start extracting text. + match = re.search(start_pattern, text, re.M) + if not match: # start_pattern not found in text. + return None + start_position = match.end(0) + + assert start_position > 0, ( + 'start_pattern must ends with an opening punctuation.') + assert text[start_position - 1] in matching_punctuation, ( + 'start_pattern must ends with an opening punctuation.') + # Stack of closing punctuations we expect to have in text after position. + punctuation_stack = [matching_punctuation[text[start_position - 1]]] + position = start_position + while punctuation_stack and position < len(text): + if text[position] == punctuation_stack[-1]: + punctuation_stack.pop() + elif text[position] in closing_punctuation: + # A closing punctuation without matching opening punctuations. + return None + elif text[position] in matching_punctuation: + punctuation_stack.append(matching_punctuation[text[position]]) + position += 1 + if punctuation_stack: + # Opening punctuations left without matching close-punctuations. + return None + # punctuations match. + return text[start_position:position - 1] + +# Patterns for matching call-by-reference parameters. +# +# Supports nested templates up to 2 levels deep using this messy pattern: +# < (?: < (?: < [^<>]* +# > +# | [^<>] )* +# > +# | [^<>] )* +# > +_RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* +_RE_PATTERN_TYPE = ( + r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' + r'(?:\w|' + r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' + r'::)+') +# A call-by-reference parameter ends with '& identifier'. +_RE_PATTERN_REF_PARAM = re.compile( + r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' + r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') +# A call-by-const-reference parameter either ends with 'const& identifier' +# or looks like 'const type& identifier' when 'type' is atomic. +_RE_PATTERN_CONST_REF_PARAM = ( + r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + + r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') +# Stream types. +_RE_PATTERN_REF_STREAM_PARAM = ( + r'(?:.*stream\s*&\s*' + _RE_PATTERN_IDENT + r')') + +def CheckLanguage(filename, clean_lines, linenum, file_extension, + include_state, nesting_state, error): + """Checks rules from the 'C++ language rules' section of cppguide.html. + + Some of these rules are hard to test (function overloading, using + uint32 inappropriately), but we do the best we can. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + file_extension: The extension (without the dot) of the filename. + include_state: An _IncludeState instance in which the headers are inserted. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # If the line is empty or consists of entirely a comment, no need to + # check it. + line = clean_lines.elided[linenum] + if not line: + return + + match = _RE_PATTERN_INCLUDE.search(line) + if match: + CheckIncludeLine(filename, clean_lines, linenum, include_state, error) + return + + # Reset include state across preprocessor directives. This is meant + # to silence warnings for conditional includes. + match = Match(r'^\s*#\s*(if|ifdef|ifndef|elif|else|endif)\b', line) + if match: + include_state.ResetSection(match.group(1)) + + # Perform other checks now that we are sure that this is not an include line + CheckCasts(filename, clean_lines, linenum, error) + CheckGlobalStatic(filename, clean_lines, linenum, error) + CheckPrintf(filename, clean_lines, linenum, error) + + if IsHeaderExtension(file_extension): + # TODO(unknown): check that 1-arg constructors are explicit. + # How to tell it's a constructor? + # (handled in CheckForNonStandardConstructs for now) + # TODO(unknown): check that classes declare or disable copy/assign + # (level 1 error) + pass + + # Check if people are using the verboten C basic types. The only exception + # we regularly allow is "unsigned short port" for port. + if Search(r'\bshort port\b', line): + if not Search(r'\bunsigned short port\b', line): + error(filename, linenum, 'runtime/int', 4, + 'Use "unsigned short" for ports, not "short"') + else: + match = Search(r'\b(short|long(?! +double)|long long)\b', line) + if match: + error(filename, linenum, 'runtime/int', 4, + 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) + + # Check if some verboten operator overloading is going on + # TODO(unknown): catch out-of-line unary operator&: + # class X {}; + # int operator&(const X& x) { return 42; } // unary operator& + # The trick is it's hard to tell apart from binary operator&: + # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& + if Search(r'\boperator\s*&\s*\(\s*\)', line): + error(filename, linenum, 'runtime/operator', 4, + 'Unary operator& is dangerous. Do not use it.') + + # Check for suspicious usage of "if" like + # } if (a == b) { + if Search(r'\}\s*if\s*\(', line): + error(filename, linenum, 'readability/braces', 4, + 'Did you mean "else if"? If not, start a new line for "if".') + + # Check for potential format string bugs like printf(foo). + # We constrain the pattern not to pick things like DocidForPrintf(foo). + # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) + # TODO(unknown): Catch the following case. Need to change the calling + # convention of the whole function to process multiple line to handle it. + # printf( + # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); + printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') + if printf_args: + match = Match(r'([\w.\->()]+)$', printf_args) + if match and match.group(1) != '__VA_ARGS__': + function_name = re.search(r'\b((?:string)?printf)\s*\(', + line, re.I).group(1) + error(filename, linenum, 'runtime/printf', 4, + 'Potential format string bug. Do %s("%%s", %s) instead.' + % (function_name, match.group(1))) + + # Check for potential memset bugs like memset(buf, sizeof(buf), 0). + match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) + if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): + error(filename, linenum, 'runtime/memset', 4, + 'Did you mean "memset(%s, 0, %s)"?' + % (match.group(1), match.group(2))) + + if Search(r'\busing namespace\b', line): + if Search(r'\bliterals\b', line): + error(filename, linenum, 'build/namespaces_literals', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + else: + error(filename, linenum, 'build/namespaces', 5, + 'Do not use namespace using-directives. ' + 'Use using-declarations instead.') + + # Detect variable-length arrays. + match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) + if (match and match.group(2) != 'return' and match.group(2) != 'delete' and + match.group(3).find(']') == -1): + # Split the size using space and arithmetic operators as delimiters. + # If any of the resulting tokens are not compile time constants then + # report the error. + tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) + is_const = True + skip_next = False + for tok in tokens: + if skip_next: + skip_next = False + continue + + if Search(r'sizeof\(.+\)', tok): continue + if Search(r'arraysize\(\w+\)', tok): continue + + tok = tok.lstrip('(') + tok = tok.rstrip(')') + if not tok: continue + if Match(r'\d+', tok): continue + if Match(r'0[xX][0-9a-fA-F]+', tok): continue + if Match(r'k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue + if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue + # A catch all for tricky sizeof cases, including 'sizeof expression', + # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' + # requires skipping the next token because we split on ' ' and '*'. + if tok.startswith('sizeof'): + skip_next = True + continue + is_const = False + break + if not is_const: + error(filename, linenum, 'runtime/arrays', 1, + 'Do not use variable-length arrays. Use an appropriately named ' + "('k' followed by CamelCase) compile-time constant for the size.") + + # Check for use of unnamed namespaces in header files. Registration + # macros are typically OK, so we allow use of "namespace {" on lines + # that end with backslashes. + if (IsHeaderExtension(file_extension) + and Search(r'\bnamespace\s*{', line) + and line[-1] != '\\'): + error(filename, linenum, 'build/namespaces_headers', 4, + 'Do not use unnamed namespaces in header files. See ' + 'https://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' + ' for more information.') + +def CheckGlobalStatic(filename, clean_lines, linenum, error): + """Check for unsafe global or static objects. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Match two lines at a time to support multiline declarations + if linenum + 1 < clean_lines.NumLines() and not Search(r'[;({]', line): + line += clean_lines.elided[linenum + 1].strip() + + # Check for people declaring static/global STL strings at the top level. + # This is dangerous because the C++ language does not guarantee that + # globals with constructors are initialized before the first access, and + # also because globals can be destroyed when some threads are still running. + # TODO(unknown): Generalize this to also find static unique_ptr instances. + # TODO(unknown): File bugs for clang-tidy to find these. + match = Match( + r'((?:|static +)(?:|const +))(?::*std::)?string( +const)? +' + r'([a-zA-Z0-9_:]+)\b(.*)', + line) + + # Remove false positives: + # - String pointers (as opposed to values). + # string *pointer + # const string *pointer + # string const *pointer + # string *const pointer + # + # - Functions and template specializations. + # string Function(... + # string Class::Method(... + # + # - Operators. These are matched separately because operator names + # cross non-word boundaries, and trying to match both operators + # and functions at the same time would decrease accuracy of + # matching identifiers. + # string Class::operator*() + if (match and + not Search(r'\bstring\b(\s+const)?\s*[\*\&]\s*(const\s+)?\w', line) and + not Search(r'\boperator\W', line) and + not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)*\s*\(([^"]|$)', match.group(4))): + if Search(r'\bconst\b', line): + error(filename, linenum, 'runtime/string', 4, + 'For a static/global string constant, use a C style string ' + 'instead: "%schar%s %s[]".' % + (match.group(1), match.group(2) or '', match.group(3))) + else: + error(filename, linenum, 'runtime/string', 4, + 'Static/global string variables are not permitted.') + + if (Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line) or + Search(r'\b([A-Za-z0-9_]*_)\(CHECK_NOTNULL\(\1\)\)', line)): + error(filename, linenum, 'runtime/init', 4, + 'You seem to be initializing a member variable with itself.') + +def CheckPrintf(filename, clean_lines, linenum, error): + """Check for printf related issues. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # When snprintf is used, the second argument shouldn't be a literal. + match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) + if match and match.group(2) != '0': + # If 2nd arg is zero, snprintf is used to calculate size. + error(filename, linenum, 'runtime/printf', 3, + 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' + 'to snprintf.' % (match.group(1), match.group(2))) + + # Check if some verboten C functions are being used. + if Search(r'\bsprintf\s*\(', line): + error(filename, linenum, 'runtime/printf', 5, + 'Never use sprintf. Use snprintf instead.') + match = Search(r'\b(strcpy|strcat)\s*\(', line) + if match: + error(filename, linenum, 'runtime/printf', 4, + 'Almost always, snprintf is better than %s' % match.group(1)) + +def IsDerivedFunction(clean_lines, linenum): + """Check if current line contains an inherited function. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains a function with "override" + virt-specifier. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + match = Match(r'^([^()]*\w+)\(', clean_lines.elided[i]) + if match: + # Look for "override" after the matching closing parenthesis + line, _, closing_paren = CloseExpression( + clean_lines, i, len(match.group(1))) + return (closing_paren >= 0 and + Search(r'\boverride\b', line[closing_paren:])) + return False + +def IsOutOfLineMethodDefinition(clean_lines, linenum): + """Check if current line contains an out-of-line method definition. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line contains an out-of-line method definition. + """ + # Scan back a few lines for start of current function + for i in xrange(linenum, max(-1, linenum - 10), -1): + if Match(r'^([^()]*\w+)\(', clean_lines.elided[i]): + return Match(r'^[^()]*\w+::\w+\(', clean_lines.elided[i]) is not None + return False + +def IsInitializerList(clean_lines, linenum): + """Check if current line is inside constructor initializer list. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + Returns: + True if current line appears to be inside constructor initializer + list, False otherwise. + """ + for i in xrange(linenum, 1, -1): + line = clean_lines.elided[i] + if i == linenum: + remove_function_body = Match(r'^(.*)\{\s*$', line) + if remove_function_body: + line = remove_function_body.group(1) + + if Search(r'\s:\s*\w+[({]', line): + # A lone colon tend to indicate the start of a constructor + # initializer list. It could also be a ternary operator, which + # also tend to appear in constructor initializer lists as + # opposed to parameter lists. + return True + if Search(r'\}\s*,\s*$', line): + # A closing brace followed by a comma is probably the end of a + # brace-initialized member in constructor initializer list. + return True + if Search(r'[{};]\s*$', line): + # Found one of the following: + # - A closing brace or semicolon, probably the end of the previous + # function. + # - An opening brace, probably the start of current class or namespace. + # + # Current line is probably not inside an initializer list since + # we saw one of those things without seeing the starting colon. + return False + + # Got to the beginning of the file without seeing the start of + # constructor initializer list. + return False + +def CheckForNonConstReference(filename, clean_lines, linenum, + nesting_state, error): + """Check for non-const references. + + Separate from CheckLanguage since it scans backwards from current + line, instead of scanning forward. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: The function to call with any errors found. + """ + # Do nothing if there is no '&' on current line. + line = clean_lines.elided[linenum] + if '&' not in line: + return + + # If a function is inherited, current function doesn't have much of + # a choice, so any non-const references should not be blamed on + # derived function. + if IsDerivedFunction(clean_lines, linenum): + return + + # Don't warn on out-of-line method definitions, as we would warn on the + # in-line declaration, if it isn't marked with 'override'. + if IsOutOfLineMethodDefinition(clean_lines, linenum): + return + + # Long type names may be broken across multiple lines, usually in one + # of these forms: + # LongType + # ::LongTypeContinued &identifier + # LongType:: + # LongTypeContinued &identifier + # LongType< + # ...>::LongTypeContinued &identifier + # + # If we detected a type split across two lines, join the previous + # line to current line so that we can match const references + # accordingly. + # + # Note that this only scans back one line, since scanning back + # arbitrary number of lines would be expensive. If you have a type + # that spans more than 2 lines, please use a typedef. + if linenum > 1: + previous = None + if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): + # previous_line\n + ::current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', + clean_lines.elided[linenum - 1]) + elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): + # previous_line::\n + current_line + previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', + clean_lines.elided[linenum - 1]) + if previous: + line = previous.group(1) + line.lstrip() + else: + # Check for templated parameter that is split across multiple lines + endpos = line.rfind('>') + if endpos > -1: + (_, startline, startpos) = ReverseCloseExpression( + clean_lines, linenum, endpos) + if startpos > -1 and startline < linenum: + # Found the matching < on an earlier line, collect all + # pieces up to current line. + line = '' + for i in xrange(startline, linenum + 1): + line += clean_lines.elided[i].strip() + + # Check for non-const references in function parameters. A single '&' may + # found in the following places: + # inside expression: binary & for bitwise AND + # inside expression: unary & for taking the address of something + # inside declarators: reference parameter + # We will exclude the first two cases by checking that we are not inside a + # function body, including one that was just introduced by a trailing '{'. + # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. + if (nesting_state.previous_stack_top and + not (isinstance(nesting_state.previous_stack_top, _ClassInfo) or + isinstance(nesting_state.previous_stack_top, _NamespaceInfo))): + # Not at toplevel, not within a class, and not within a namespace + return + + # Avoid initializer lists. We only need to scan back from the + # current line for something that starts with ':'. + # + # We don't need to check the current line, since the '&' would + # appear inside the second set of parentheses on the current line as + # opposed to the first set. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 10), -1): + previous_line = clean_lines.elided[i] + if not Search(r'[),]\s*$', previous_line): + break + if Match(r'^\s*:\s+\S', previous_line): + return + + # Avoid preprocessors + if Search(r'\\\s*$', line): + return + + # Avoid constructor initializer lists + if IsInitializerList(clean_lines, linenum): + return + + # We allow non-const references in a few standard places, like functions + # called "swap()" or iostream operators like "<<" or ">>". Do not check + # those function parameters. + # + # We also accept & in static_assert, which looks like a function but + # it's actually a declaration expression. + allowed_functions = (r'(?:[sS]wap(?:<\w:+>)?|' + r'operator\s*[<>][<>]|' + r'static_assert|COMPILE_ASSERT' + r')\s*\(') + if Search(allowed_functions, line): + return + elif not Search(r'\S+\([^)]*$', line): + # Don't see an allowed function on this line. Actually we + # didn't see any function name on this line, so this is likely a + # multi-line parameter list. Try a bit harder to catch this case. + for i in xrange(2): + if (linenum > i and + Search(allowed_functions, clean_lines.elided[linenum - i - 1])): + return + + decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body + for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): + if (not Match(_RE_PATTERN_CONST_REF_PARAM, parameter) and + not Match(_RE_PATTERN_REF_STREAM_PARAM, parameter)): + error(filename, linenum, 'runtime/references', 2, + 'Is this a non-const reference? ' + 'If so, make const or use a pointer: ' + + ReplaceAll(' *<', '<', parameter)) + +def CheckCasts(filename, clean_lines, linenum, error): + """Various cast related checks. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + # Check to see if they're using an conversion function cast. + # I just try to capture the most common basic types, though there are more. + # Parameterless conversion functions, such as bool(), are allowed as they are + # probably a member operator declaration or default constructor. + match = Search( + r'(\bnew\s+(?:const\s+)?|\S<\s*(?:const\s+)?)?\b' + r'(int|float|double|bool|char|int32|uint32|int64|uint64)' + r'(\([^)].*)', line) + expecting_function = ExpectingFunctionArgs(clean_lines, linenum) + if match and not expecting_function: + matched_type = match.group(2) + + # matched_new_or_template is used to silence two false positives: + # - New operators + # - Template arguments with function types + # + # For template arguments, we match on types immediately following + # an opening bracket without any spaces. This is a fast way to + # silence the common case where the function type is the first + # template argument. False negative with less-than comparison is + # avoided because those operators are usually followed by a space. + # + # function // bracket + no space = false positive + # value < double(42) // bracket + space = true positive + matched_new_or_template = match.group(1) + + # Avoid arrays by looking for brackets that come after the closing + # parenthesis. + if Match(r'\([^()]+\)\s*\[', match.group(3)): + return + + # Other things to ignore: + # - Function pointers + # - Casts to pointer types + # - Placement new + # - Alias declarations + matched_funcptr = match.group(3) + if (matched_new_or_template is None and + not (matched_funcptr and + (Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', + matched_funcptr) or + matched_funcptr.startswith('(*)'))) and + not Match(r'\s*using\s+\S+\s*=\s*' + matched_type, line) and + not Search(r'new\(\S+\)\s*' + matched_type, line)): + error(filename, linenum, 'readability/casting', 4, + 'Using deprecated casting style. ' + 'Use static_cast<%s>(...) instead' % + matched_type) + + if not expecting_function: + CheckCStyleCast(filename, clean_lines, linenum, 'static_cast', + r'\((int|float|double|bool|char|u?int(16|32|64))\)', error) + + # This doesn't catch all cases. Consider (const char * const)"hello". + # + # (char *) "foo" should always be a const_cast (reinterpret_cast won't + # compile). + if CheckCStyleCast(filename, clean_lines, linenum, 'const_cast', + r'\((char\s?\*+\s?)\)\s*"', error): + pass + else: + # Check pointer casts for other than string constants + CheckCStyleCast(filename, clean_lines, linenum, 'reinterpret_cast', + r'\((\w+\s?\*+\s?)\)', error) + + # In addition, we look for people taking the address of a cast. This + # is dangerous -- casts can assign to temporaries, so the pointer doesn't + # point where you think. + # + # Some non-identifier character is required before the '&' for the + # expression to be recognized as a cast. These are casts: + # expression = &static_cast(temporary()); + # function(&(int*)(temporary())); + # + # This is not a cast: + # reference_type&(int* function_param); + match = Search( + r'(?:[^\w]&\(([^)*][^)]*)\)[\w(])|' + r'(?:[^\w]&(static|dynamic|down|reinterpret)_cast\b)', line) + if match: + # Try a better error message when the & is bound to something + # dereferenced by the casted pointer, as opposed to the casted + # pointer itself. + parenthesis_error = False + match = Match(r'^(.*&(?:static|dynamic|down|reinterpret)_cast\b)<', line) + if match: + _, y1, x1 = CloseExpression(clean_lines, linenum, len(match.group(1))) + if x1 >= 0 and clean_lines.elided[y1][x1] == '(': + _, y2, x2 = CloseExpression(clean_lines, y1, x1) + if x2 >= 0: + extended_line = clean_lines.elided[y2][x2:] + if y2 < clean_lines.NumLines() - 1: + extended_line += clean_lines.elided[y2 + 1] + if Match(r'\s*(?:->|\[)', extended_line): + parenthesis_error = True + + if parenthesis_error: + error(filename, linenum, 'readability/casting', 4, + ('Are you taking an address of something dereferenced ' + 'from a cast? Wrapping the dereferenced expression in ' + 'parentheses will make the binding more obvious')) + else: + error(filename, linenum, 'runtime/casting', 4, + ('Are you taking an address of a cast? ' + 'This is dangerous: could be a temp var. ' + 'Take the address before doing the cast, rather than after')) + +def CheckCStyleCast(filename, clean_lines, linenum, cast_type, pattern, error): + """Checks for a C-style cast by looking for the pattern. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + cast_type: The string for the C++ cast to recommend. This is either + reinterpret_cast, static_cast, or const_cast, depending. + pattern: The regular expression used to find C-style casts. + error: The function to call with any errors found. + + Returns: + True if an error was emitted. + False otherwise. + """ + line = clean_lines.elided[linenum] + match = Search(pattern, line) + if not match: + return False + + # Exclude lines with keywords that tend to look like casts + context = line[0:match.start(1) - 1] + if Match(r'.*\b(?:sizeof|alignof|alignas|[_A-Z][_A-Z0-9]*)\s*$', context): + return False + + # Try expanding current context to see if we one level of + # parentheses inside a macro. + if linenum > 0: + for i in xrange(linenum - 1, max(0, linenum - 5), -1): + context = clean_lines.elided[i] + context + if Match(r'.*\b[_A-Z][_A-Z0-9]*\s*\((?:\([^()]*\)|[^()])*$', context): + return False + + # operator++(int) and operator--(int) + if context.endswith(' operator++') or context.endswith(' operator--'): + return False + + # A single unnamed argument for a function tends to look like old style cast. + # If we see those, don't issue warnings for deprecated casts. + remainder = line[match.end(0):] + if Match(r'^\s*(?:;|const\b|throw\b|final\b|override\b|[=>{),]|->)', + remainder): + return False + + # At this point, all that should be left is actual casts. + error(filename, linenum, 'readability/casting', 4, + 'Using C-style cast. Use %s<%s>(...) instead' % + (cast_type, match.group(1))) + + return True + +def ExpectingFunctionArgs(clean_lines, linenum): + """Checks whether where function type arguments are expected. + + Args: + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + + Returns: + True if the line at 'linenum' is inside something that expects arguments + of function types. + """ + line = clean_lines.elided[linenum] + return (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or + (linenum >= 2 and + (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', + clean_lines.elided[linenum - 1]) or + Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', + clean_lines.elided[linenum - 2]) or + Search(r'\bstd::m?function\s*\<\s*$', + clean_lines.elided[linenum - 1])))) + +_HEADERS_CONTAINING_TEMPLATES = ( + ('', ('deque',)), + ('', ('unary_function', 'binary_function', + 'plus', 'minus', 'multiplies', 'divides', 'modulus', + 'negate', + 'equal_to', 'not_equal_to', 'greater', 'less', + 'greater_equal', 'less_equal', + 'logical_and', 'logical_or', 'logical_not', + 'unary_negate', 'not1', 'binary_negate', 'not2', + 'bind1st', 'bind2nd', + 'pointer_to_unary_function', + 'pointer_to_binary_function', + 'ptr_fun', + 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', + 'mem_fun_ref_t', + 'const_mem_fun_t', 'const_mem_fun1_t', + 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', + 'mem_fun_ref', + )), + ('', ('numeric_limits',)), + ('', ('list',)), + ('', ('multimap',)), + ('', ('allocator', 'make_shared', 'make_unique', 'shared_ptr', + 'unique_ptr', 'weak_ptr')), + ('', ('queue', 'priority_queue',)), + ('', ('multiset',)), + ('', ('stack',)), + ('', ('char_traits', 'basic_string',)), + ('', ('tuple',)), + ('', ('unordered_map', 'unordered_multimap')), + ('', ('unordered_set', 'unordered_multiset')), + ('', ('pair',)), + ('', ('vector',)), + + # gcc extensions. + # Note: std::hash is their hash, ::hash is our hash + ('', ('hash_map', 'hash_multimap',)), + ('', ('hash_set', 'hash_multiset',)), + ('', ('slist',)), + ) + +_HEADERS_MAYBE_TEMPLATES = ( + ('', ('copy', 'max', 'min', 'min_element', 'sort', + 'transform', + )), + ('', ('forward', 'make_pair', 'move', 'swap')), + ) + +_RE_PATTERN_STRING = re.compile(r'\bstring\b') + +_re_pattern_headers_maybe_templates = [] +for _header, _templates in _HEADERS_MAYBE_TEMPLATES: + for _template in _templates: + # Match max(..., ...), max(..., ...), but not foo->max, foo.max or + # 'type::max()'. + _re_pattern_headers_maybe_templates.append( + (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), + _template, + _header)) +# Match set, but not foo->set, foo.set +_re_pattern_headers_maybe_templates.append( + (re.compile(r'[^>.]\bset\s*\<'), + 'set<>', + '')) +# Match 'map var' and 'std::map(...)', but not 'map(...)'' +_re_pattern_headers_maybe_templates.append( + (re.compile(r'(std\b::\bmap\s*\<)|(^(std\b::\b)map\b\(\s*\<)'), + 'map<>', + '')) + +# Other scripts may reach in and modify this pattern. +_re_pattern_templates = [] +for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: + for _template in _templates: + _re_pattern_templates.append( + (re.compile(r'(\<|\b)' + _template + r'\s*\<'), + _template + '<>', + _header)) + +def FilesBelongToSameModule(filename_cc, filename_h): + """Check if these two filenames belong to the same module. + + The concept of a 'module' here is a as follows: + foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the + same 'module' if they are in the same directory. + some/path/public/xyzzy and some/path/internal/xyzzy are also considered + to belong to the same module here. + + If the filename_cc contains a longer path than the filename_h, for example, + '/absolute/path/to/base/sysinfo.cc', and this file would include + 'base/sysinfo.h', this function also produces the prefix needed to open the + header. This is used by the caller of this function to more robustly open the + header file. We don't have access to the real include paths in this context, + so we need this guesswork here. + + Known bugs: tools/base/bar.cc and base/bar.h belong to the same module + according to this implementation. Because of this, this function gives + some false positives. This should be sufficiently rare in practice. + + Args: + filename_cc: is the path for the source (e.g. .cc) file + filename_h: is the path for the header path + + Returns: + Tuple with a bool and a string: + bool: True if filename_cc and filename_h belong to the same module. + string: the additional prefix needed to open the header file. + """ + fileinfo_cc = FileInfo(filename_cc) + if not fileinfo_cc.Extension().lstrip('.') in GetNonHeaderExtensions(): + return (False, '') + + fileinfo_h = FileInfo(filename_h) + if not IsHeaderExtension(fileinfo_h.Extension().lstrip('.')): + return (False, '') + + filename_cc = filename_cc[:-(len(fileinfo_cc.Extension()))] + matched_test_suffix = Search(_TEST_FILE_SUFFIX, fileinfo_cc.BaseName()) + if matched_test_suffix: + filename_cc = filename_cc[:-len(matched_test_suffix.group(1))] + + filename_cc = filename_cc.replace('/public/', '/') + filename_cc = filename_cc.replace('/internal/', '/') + + filename_h = filename_h[:-(len(fileinfo_h.Extension()))] + if filename_h.endswith('-inl'): + filename_h = filename_h[:-len('-inl')] + filename_h = filename_h.replace('/public/', '/') + filename_h = filename_h.replace('/internal/', '/') + + files_belong_to_same_module = filename_cc.endswith(filename_h) + common_path = '' + if files_belong_to_same_module: + common_path = filename_cc[:-len(filename_h)] + return files_belong_to_same_module, common_path + +def UpdateIncludeState(filename, include_dict, io=codecs): + """Fill up the include_dict with new includes found from the file. + + Args: + filename: the name of the header to read. + include_dict: a dictionary in which the headers are inserted. + io: The io factory to use to read the file. Provided for testability. + + Returns: + True if a header was successfully added. False otherwise. + """ + headerfile = None + try: + with io.open(filename, 'r', 'utf8', 'replace') as headerfile: + linenum = 0 + for line in headerfile: + linenum += 1 + clean_line = CleanseComments(line) + match = _RE_PATTERN_INCLUDE.search(clean_line) + if match: + include = match.group(2) + include_dict.setdefault(include, linenum) + return True + except IOError: + return False + +def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, + io=codecs): + """Reports for missing stl includes. + + This function will output warnings to make sure you are including the headers + necessary for the stl containers and functions that you use. We only give one + reason to include a header. For example, if you use both equal_to<> and + less<> in a .h file, only one (the latter in the file) of these will be + reported as a reason to include the . + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + include_state: An _IncludeState instance. + error: The function to call with any errors found. + io: The IO factory to use to read the header file. Provided for unittest + injection. + """ + required = {} # A map of header name to linenumber and the template entity. + # Example of required: { '': (1219, 'less<>') } + + for linenum in xrange(clean_lines.NumLines()): + line = clean_lines.elided[linenum] + if not line or line[0] == '#': + continue + + # String is special -- it is a non-templatized type in STL. + matched = _RE_PATTERN_STRING.search(line) + if matched: + # Don't warn about strings in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[''] = (linenum, 'string') + + for pattern, template, header in _re_pattern_headers_maybe_templates: + if pattern.search(line): + required[header] = (linenum, template) + + # The following function is just a speed up, no semantics are changed. + if not '<' in line: # Reduces the cpu time usage by skipping lines. + continue + + for pattern, template, header in _re_pattern_templates: + matched = pattern.search(line) + if matched: + # Don't warn about IWYU in non-STL namespaces: + # (We check only the first match per line; good enough.) + prefix = line[:matched.start()] + if prefix.endswith('std::') or not prefix.endswith('::'): + required[header] = (linenum, template) + + # The policy is that if you #include something in foo.h you don't need to + # include it again in foo.cc. Here, we will look at possible includes. + # Let's flatten the include_state include_list and copy it into a dictionary. + include_dict = dict([item for sublist in include_state.include_list + for item in sublist]) + + # Did we find the header for this file (if any) and successfully load it? + header_found = False + + # Use the absolute path so that matching works properly. + abs_filename = FileInfo(filename).FullName() + + # For Emacs's flymake. + # If cpplint is invoked from Emacs's flymake, a temporary file is generated + # by flymake and that file name might end with '_flymake.cc'. In that case, + # restore original file name here so that the corresponding header file can be + # found. + # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' + # instead of 'foo_flymake.h' + abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) + + # include_dict is modified during iteration, so we iterate over a copy of + # the keys. + header_keys = list(include_dict.keys()) + for header in header_keys: + (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) + fullpath = common_path + header + if same_module and UpdateIncludeState(fullpath, include_dict, io): + header_found = True + + # If we can't find the header file for a .cc, assume it's because we don't + # know where to look. In that case we'll give up as we're not sure they + # didn't include it in the .h file. + # TODO(unknown): Do a better job of finding .h files so we are confident that + # not having the .h file means there isn't one. + if not header_found: + for extension in GetNonHeaderExtensions(): + if filename.endswith('.' + extension): + return + + # All the lines have been processed, report the errors found. + for required_header_unstripped in sorted(required, key=required.__getitem__): + template = required[required_header_unstripped][1] + if required_header_unstripped.strip('<>"') not in include_dict: + error(filename, required[required_header_unstripped][0], + 'build/include_what_you_use', 4, + 'Add #include ' + required_header_unstripped + ' for ' + template) + +_RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') + +def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): + """Check that make_pair's template arguments are deduced. + + G++ 4.6 in C++11 mode fails badly if make_pair's template arguments are + specified explicitly, and such use isn't intended in any case. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) + if match: + error(filename, linenum, 'build/explicit_make_pair', + 4, # 4 = high confidence + 'For C++11-compatibility, omit template arguments from make_pair' + ' OR use pair directly OR if appropriate, construct a pair directly') + +def CheckRedundantVirtual(filename, clean_lines, linenum, error): + """Check if line contains a redundant "virtual" function-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for "virtual" on current line. + line = clean_lines.elided[linenum] + virtual = Match(r'^(.*)(\bvirtual\b)(.*)$', line) + if not virtual: return + + # Ignore "virtual" keywords that are near access-specifiers. These + # are only used in class base-specifier and do not apply to member + # functions. + if (Search(r'\b(public|protected|private)\s+$', virtual.group(1)) or + Match(r'^\s+(public|protected|private)\b', virtual.group(3))): + return + + # Ignore the "virtual" keyword from virtual base classes. Usually + # there is a column on the same line in these cases (virtual base + # classes are rare in google3 because multiple inheritance is rare). + if Match(r'^.*[^:]:[^:].*$', line): return + + # Look for the next opening parenthesis. This is the start of the + # parameter list (possibly on the next line shortly after virtual). + # TODO(unknown): doesn't work if there are virtual functions with + # decltype() or other things that use parentheses, but csearch suggests + # that this is rare. + end_col = -1 + end_line = -1 + start_col = len(virtual.group(2)) + for start_line in xrange(linenum, min(linenum + 3, clean_lines.NumLines())): + line = clean_lines.elided[start_line][start_col:] + parameter_list = Match(r'^([^(]*)\(', line) + if parameter_list: + # Match parentheses to find the end of the parameter list + (_, end_line, end_col) = CloseExpression( + clean_lines, start_line, start_col + len(parameter_list.group(1))) + break + start_col = 0 + + if end_col < 0: + return # Couldn't find end of parameter list, give up + + # Look for "override" or "final" after the parameter list + # (possibly on the next few lines). + for i in xrange(end_line, min(end_line + 3, clean_lines.NumLines())): + line = clean_lines.elided[i][end_col:] + match = Search(r'\b(override|final)\b', line) + if match: + error(filename, linenum, 'readability/inheritance', 4, + ('"virtual" is redundant since function is ' + 'already declared as "%s"' % match.group(1))) + + # Set end_col to check whole lines after we are done with the + # first line. + end_col = 0 + if Search(r'[^\w]\s*$', line): + break + +def CheckRedundantOverrideOrFinal(filename, clean_lines, linenum, error): + """Check if line contains a redundant "override" or "final" virt-specifier. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + # Look for closing parenthesis nearby. We need one to confirm where + # the declarator ends and where the virt-specifier starts to avoid + # false positives. + line = clean_lines.elided[linenum] + declarator_end = line.rfind(')') + if declarator_end >= 0: + fragment = line[declarator_end:] + else: + if linenum > 1 and clean_lines.elided[linenum - 1].rfind(')') >= 0: + fragment = line + else: + return + + # Check that at most one of "override" or "final" is present, not both + if Search(r'\boverride\b', fragment) and Search(r'\bfinal\b', fragment): + error(filename, linenum, 'readability/inheritance', 4, + ('"override" is redundant since function is ' + 'already declared as "final"')) + +# Returns true if we are at a new block, and it is directly +# inside of a namespace. +def IsBlockInNameSpace(nesting_state, is_forward_declaration): + """Checks that the new block is directly in a namespace. + + Args: + nesting_state: The _NestingState object that contains info about our state. + is_forward_declaration: If the class is a forward declared class. + Returns: + Whether or not the new block is directly in a namespace. + """ + if is_forward_declaration: + return len(nesting_state.stack) >= 1 and ( + isinstance(nesting_state.stack[-1], _NamespaceInfo)) + + return (len(nesting_state.stack) > 1 and + nesting_state.stack[-1].check_namespace_indentation and + isinstance(nesting_state.stack[-2], _NamespaceInfo)) + +def ShouldCheckNamespaceIndentation(nesting_state, is_namespace_indent_item, + raw_lines_no_comments, linenum): + """This method determines if we should apply our namespace indentation check. + + Args: + nesting_state: The current nesting state. + is_namespace_indent_item: If we just put a new class on the stack, True. + If the top of the stack is not a class, or we did not recently + add the class, False. + raw_lines_no_comments: The lines without the comments. + linenum: The current line number we are processing. + + Returns: + True if we should apply our namespace indentation check. Currently, it + only works for classes and namespaces inside of a namespace. + """ + + is_forward_declaration = IsForwardClassDeclaration(raw_lines_no_comments, + linenum) + + if not (is_namespace_indent_item or is_forward_declaration): + return False + + # If we are in a macro, we do not want to check the namespace indentation. + if IsMacroDefinition(raw_lines_no_comments, linenum): + return False + + return IsBlockInNameSpace(nesting_state, is_forward_declaration) + +# Call this method if the line is directly inside of a namespace. +# If the line above is blank (excluding comments) or the start of +# an inner namespace, it cannot be indented. +def CheckItemIndentationInNamespace(filename, raw_lines_no_comments, linenum, + error): + line = raw_lines_no_comments[linenum] + if Match(r'^\s+', line): + error(filename, linenum, 'runtime/indentation_namespace', 4, + 'Do not indent within a namespace') + +def ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions=None): + """Processes a single line in the file. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + clean_lines: An array of strings, each representing a line of the file, + with comments stripped. + line: Number of line being processed. + include_state: An _IncludeState instance in which the headers are inserted. + function_state: A _FunctionState instance which counts function lines, etc. + nesting_state: A NestingState instance which maintains information about + the current stack of nested blocks being parsed. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + raw_lines = clean_lines.raw_lines + ParseNolintSuppressions(filename, raw_lines[line], line, error) + nesting_state.Update(filename, clean_lines, line, error) + CheckForNamespaceIndentation(filename, nesting_state, clean_lines, line, + error) + if nesting_state.InAsmBlock(): return + CheckForFunctionLengths(filename, clean_lines, line, function_state, error) + CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) + CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) + CheckLanguage(filename, clean_lines, line, file_extension, include_state, + nesting_state, error) + CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) + CheckForNonStandardConstructs(filename, clean_lines, line, + nesting_state, error) + CheckVlogArguments(filename, clean_lines, line, error) + CheckPosixThreading(filename, clean_lines, line, error) + CheckInvalidIncrement(filename, clean_lines, line, error) + CheckMakePairUsesDeduction(filename, clean_lines, line, error) + CheckRedundantVirtual(filename, clean_lines, line, error) + CheckRedundantOverrideOrFinal(filename, clean_lines, line, error) + if extra_check_functions: + for check_fn in extra_check_functions: + check_fn(filename, clean_lines, line, error) + +def FlagCxx11Features(filename, clean_lines, linenum, error): + """Flag those c++11 features that we only allow in certain places. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++ TR1 headers. + if include and include.group(1).startswith('tr1/'): + error(filename, linenum, 'build/c++tr1', 5, + ('C++ TR1 headers such as <%s> are unapproved.') % include.group(1)) + + # Flag unapproved C++11 headers. + if include and include.group(1) in ('cfenv', + 'condition_variable', + 'fenv.h', + 'future', + 'mutex', + 'thread', + 'chrono', + 'ratio', + 'regex', + 'system_error', + ): + error(filename, linenum, 'build/c++11', 5, + ('<%s> is an unapproved C++11 header.') % include.group(1)) + + # The only place where we need to worry about C++11 keywords and library + # features in preprocessor directives is in macro definitions. + if Match(r'\s*#', line) and not Match(r'\s*#\s*define\b', line): return + + # These are classes and free functions. The classes are always + # mentioned as std::*, but we only catch the free functions if + # they're not found by ADL. They're alphabetical by header. + for top_name in ( + # type_traits + 'alignment_of', + 'aligned_union', + ): + if Search(r'\bstd::%s\b' % top_name, line): + error(filename, linenum, 'build/c++11', 5, + ('std::%s is an unapproved C++11 class or function. Send c-style ' + 'an example of where it would make your code more readable, and ' + 'they may let you use it.') % top_name) + +def FlagCxx14Features(filename, clean_lines, linenum, error): + """Flag those C++14 features that we restrict. + + Args: + filename: The name of the current file. + clean_lines: A CleansedLines instance containing the file. + linenum: The number of the line to check. + error: The function to call with any errors found. + """ + line = clean_lines.elided[linenum] + + include = Match(r'\s*#\s*include\s+[<"]([^<"]+)[">]', line) + + # Flag unapproved C++14 headers. + if include and include.group(1) in ('scoped_allocator', 'shared_mutex'): + error(filename, linenum, 'build/c++14', 5, + ('<%s> is an unapproved C++14 header.') % include.group(1)) + +def ProcessFileData(filename, file_extension, lines, error, + extra_check_functions=None): + """Performs lint checks and reports any errors to the given error function. + + Args: + filename: Filename of the file that is being processed. + file_extension: The extension (dot not included) of the file. + lines: An array of strings, each representing a line of the file, with the + last element being empty if the file is terminated with a newline. + error: A callable to which errors are reported, which takes 4 arguments: + filename, line number, error level, and message + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + lines = (['// marker so line numbers and indices both start at 1'] + lines + + ['// marker so line numbers end in a known way']) + + include_state = _IncludeState() + function_state = _FunctionState() + nesting_state = NestingState() + + ResetNolintSuppressions() + + CheckForCopyright(filename, lines, error) + ProcessGlobalSuppresions(lines) + RemoveMultiLineComments(filename, lines, error) + clean_lines = CleansedLines(lines) + + if IsHeaderExtension(file_extension): + CheckForHeaderGuard(filename, clean_lines, error) + + for line in xrange(clean_lines.NumLines()): + ProcessLine(filename, file_extension, clean_lines, line, + include_state, function_state, nesting_state, error, + extra_check_functions) + FlagCxx11Features(filename, clean_lines, line, error) + nesting_state.CheckCompletedBlocks(filename, error) + + CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) + + # Check that the .cc file has included its header if it exists. + if _IsSourceExtension(file_extension): + CheckHeaderFileIncluded(filename, include_state, error) + + # We check here rather than inside ProcessLine so that we see raw + # lines rather than "cleaned" lines. + CheckForBadCharacters(filename, lines, error) + + CheckForNewlineAtEOF(filename, lines, error) + +def ProcessConfigOverrides(filename): + """ Loads the configuration files and processes the config overrides. + + Args: + filename: The name of the file being processed by the linter. + + Returns: + False if the current |filename| should not be processed further. + """ + + abs_filename = os.path.abspath(filename) + cfg_filters = [] + keep_looking = True + while keep_looking: + abs_path, base_name = os.path.split(abs_filename) + if not base_name: + break # Reached the root directory. + + cfg_file = os.path.join(abs_path, "CPPLINT.cfg") + abs_filename = abs_path + if not os.path.isfile(cfg_file): + continue + + try: + with open(cfg_file) as file_handle: + for line in file_handle: + line, _, _ = line.partition('#') # Remove comments. + if not line.strip(): + continue + + name, _, val = line.partition('=') + name = name.strip() + val = val.strip() + if name == 'set noparent': + keep_looking = False + elif name == 'filter': + cfg_filters.append(val) + elif name == 'exclude_files': + # When matching exclude_files pattern, use the base_name of + # the current file name or the directory name we are processing. + # For example, if we are checking for lint errors in /foo/bar/baz.cc + # and we found the .cfg file at /foo/CPPLINT.cfg, then the config + # file's "exclude_files" filter is meant to be checked against "bar" + # and not "baz" nor "bar/baz.cc". + if base_name: + pattern = re.compile(val) + if pattern.match(base_name): + if _cpplint_state.quiet: + # Suppress "Ignoring file" warning when using --quiet. + return False + _cpplint_state.PrintInfo('Ignoring "%s": file excluded by "%s". ' + 'File path component "%s" matches ' + 'pattern "%s"\n' % + (filename, cfg_file, base_name, val)) + return False + elif name == 'linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + _cpplint_state.PrintError('Line length must be numeric.') + elif name == 'extensions': + ProcessExtensionsOption(val) + elif name == 'root': + global _root + # root directories are specified relative to CPPLINT.cfg dir. + _root = os.path.join(os.path.dirname(cfg_file), val) + elif name == 'headers': + ProcessHppHeadersOption(val) + elif name == 'includeorder': + ProcessIncludeOrderOption(val) + else: + _cpplint_state.PrintError( + 'Invalid configuration option (%s) in file %s\n' % + (name, cfg_file)) + + except IOError: + _cpplint_state.PrintError( + "Skipping config file '%s': Can't open for reading\n" % cfg_file) + keep_looking = False + + # Apply all the accumulated filters in reverse order (top-level directory + # config options having the least priority). + for cfg_filter in reversed(cfg_filters): + _AddFilters(cfg_filter) + + return True + +def ProcessFile(filename, vlevel, extra_check_functions=None): + """Does google-lint on a single file. + + Args: + filename: The name of the file to parse. + + vlevel: The level of errors to report. Every error of confidence + >= verbose_level will be reported. 0 is a good default. + + extra_check_functions: An array of additional check functions that will be + run on each source line. Each function takes 4 + arguments: filename, clean_lines, line, error + """ + + _SetVerboseLevel(vlevel) + _BackupFilters() + old_errors = _cpplint_state.error_count + + if not ProcessConfigOverrides(filename): + _RestoreFilters() + return + + lf_lines = [] + crlf_lines = [] + try: + # Support the UNIX convention of using "-" for stdin. Note that + # we are not opening the file with universal newline support + # (which codecs doesn't support anyway), so the resulting lines do + # contain trailing '\r' characters if we are reading a file that + # has CRLF endings. + # If after the split a trailing '\r' is present, it is removed + # below. + if filename == '-': + lines = codecs.StreamReaderWriter(sys.stdin, + codecs.getreader('utf8'), + codecs.getwriter('utf8'), + 'replace').read().split('\n') + else: + with codecs.open(filename, 'r', 'utf8', 'replace') as target_file: + lines = target_file.read().split('\n') + + # Remove trailing '\r'. + # The -1 accounts for the extra trailing blank line we get from split() + for linenum in range(len(lines) - 1): + if lines[linenum].endswith('\r'): + lines[linenum] = lines[linenum].rstrip('\r') + crlf_lines.append(linenum + 1) + else: + lf_lines.append(linenum + 1) + + except IOError: + _cpplint_state.PrintError( + "Skipping input '%s': Can't open for reading\n" % filename) + _RestoreFilters() + return + + # Note, if no dot is found, this will give the entire filename as the ext. + file_extension = filename[filename.rfind('.') + 1:] + + # When reading from stdin, the extension is unknown, so no cpplint tests + # should rely on the extension. + if filename != '-' and file_extension not in GetAllExtensions(): + _cpplint_state.PrintError('Ignoring %s; not a valid file name ' + '(%s)\n' % (filename, ', '.join(GetAllExtensions()))) + else: + ProcessFileData(filename, file_extension, lines, Error, + extra_check_functions) + + # If end-of-line sequences are a mix of LF and CR-LF, issue + # warnings on the lines with CR. + # + # Don't issue any warnings if all lines are uniformly LF or CR-LF, + # since critique can handle these just fine, and the style guide + # doesn't dictate a particular end of line sequence. + # + # We can't depend on os.linesep to determine what the desired + # end-of-line sequence should be, since that will return the + # server-side end-of-line sequence. + if lf_lines and crlf_lines: + # Warn on every line with CR. An alternative approach might be to + # check whether the file is mostly CRLF or just LF, and warn on the + # minority, we bias toward LF here since most tools prefer LF. + for linenum in crlf_lines: + Error(filename, linenum, 'whitespace/newline', 1, + 'Unexpected \\r (^M) found; better to use only \\n') + + # Suppress printing anything if --quiet was passed unless the error + # count has increased after processing this file. + if not _cpplint_state.quiet or old_errors != _cpplint_state.error_count: + _cpplint_state.PrintInfo('Done processing %s\n' % filename) + _RestoreFilters() + +def PrintUsage(message): + """Prints a brief usage string and exits, optionally with an error message. + + Args: + message: The optional error message. + """ + sys.stderr.write(_USAGE % (list(GetAllExtensions()), + ','.join(list(GetAllExtensions())), + GetHeaderExtensions(), + ','.join(GetHeaderExtensions()))) + + if message: + sys.exit('\nFATAL ERROR: ' + message) + else: + sys.exit(0) + +def PrintVersion(): + sys.stdout.write('Cpplint fork (https://github.com/cpplint/cpplint)\n') + sys.stdout.write('cpplint ' + __VERSION__ + '\n') + sys.stdout.write('Python ' + sys.version + '\n') + sys.exit(0) + +def PrintCategories(): + """Prints a list of all the error-categories used by error messages. + + These are the categories used to filter messages via --filter. + """ + sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) + sys.exit(0) + +def ParseArguments(args): + """Parses the command line arguments. + + This may set the output format and verbosity level as side-effects. + + Args: + args: The command line arguments: + + Returns: + The list of filenames to lint. + """ + try: + (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', + 'v=', + 'version', + 'counting=', + 'filter=', + 'root=', + 'repository=', + 'linelength=', + 'extensions=', + 'exclude=', + 'recursive', + 'headers=', + 'includeorder=', + 'quiet']) + except getopt.GetoptError: + PrintUsage('Invalid arguments.') + + verbosity = _VerboseLevel() + output_format = _OutputFormat() + filters = '' + quiet = _Quiet() + counting_style = '' + recursive = False + + for (opt, val) in opts: + if opt == '--help': + PrintUsage(None) + if opt == '--version': + PrintVersion() + elif opt == '--output': + if val not in ('emacs', 'vs7', 'eclipse', 'junit'): + PrintUsage('The only allowed output formats are emacs, vs7, eclipse ' + 'and junit.') + output_format = val + elif opt == '--quiet': + quiet = True + elif opt == '--verbose' or opt == '--v': + verbosity = int(val) + elif opt == '--filter': + filters = val + if not filters: + PrintCategories() + elif opt == '--counting': + if val not in ('total', 'toplevel', 'detailed'): + PrintUsage('Valid counting options are total, toplevel, and detailed') + counting_style = val + elif opt == '--root': + global _root + _root = val + elif opt == '--repository': + global _repository + _repository = val + elif opt == '--linelength': + global _line_length + try: + _line_length = int(val) + except ValueError: + PrintUsage('Line length must be digits.') + elif opt == '--exclude': + global _excludes + if not _excludes: + _excludes = set() + _excludes.update(glob.glob(val)) + elif opt == '--extensions': + ProcessExtensionsOption(val) + elif opt == '--headers': + ProcessHppHeadersOption(val) + elif opt == '--recursive': + recursive = True + elif opt == '--includeorder': + ProcessIncludeOrderOption(val) + + if not filenames: + PrintUsage('No files were specified.') + + if recursive: + filenames = _ExpandDirectories(filenames) + + if _excludes: + filenames = _FilterExcludedFiles(filenames) + + _SetOutputFormat(output_format) + _SetQuiet(quiet) + _SetVerboseLevel(verbosity) + _SetFilters(filters) + _SetCountingStyle(counting_style) + + filenames.sort() + return filenames + +def _ExpandDirectories(filenames): + """Searches a list of filenames and replaces directories in the list with + all files descending from those directories. Files with extensions not in + the valid extensions list are excluded. + + Args: + filenames: A list of files or directories + + Returns: + A list of all files that are members of filenames or descended from a + directory in filenames + """ + expanded = set() + for filename in filenames: + if not os.path.isdir(filename): + expanded.add(filename) + continue + + for root, _, files in os.walk(filename): + for loopfile in files: + fullname = os.path.join(root, loopfile) + if fullname.startswith('.' + os.path.sep): + fullname = fullname[len('.' + os.path.sep):] + expanded.add(fullname) + + filtered = [] + for filename in expanded: + if os.path.splitext(filename)[1][1:] in GetAllExtensions(): + filtered.append(filename) + return filtered + +def _FilterExcludedFiles(fnames): + """Filters out files listed in the --exclude command line switch. File paths + in the switch are evaluated relative to the current working directory + """ + exclude_paths = [os.path.abspath(f) for f in _excludes] + # because globbing does not work recursively, exclude all subpath of all excluded entries + return [f for f in fnames + if not any(e for e in exclude_paths + if _IsParentOrSame(e, os.path.abspath(f)))] + +def _IsParentOrSame(parent, child): + """Return true if child is subdirectory of parent. + Assumes both paths are absolute and don't contain symlinks. + """ + parent = os.path.normpath(parent) + child = os.path.normpath(child) + if parent == child: + return True + + prefix = os.path.commonprefix([parent, child]) + if prefix != parent: + return False + # Note: os.path.commonprefix operates on character basis, so + # take extra care of situations like '/foo/ba' and '/foo/bar/baz' + child_suffix = child[len(prefix):] + child_suffix = child_suffix.lstrip(os.sep) + return child == os.path.join(prefix, child_suffix) + +def main(): + filenames = ParseArguments(sys.argv[1:]) + backup_err = sys.stderr + try: + # Change stderr to write with replacement characters so we don't die + # if we try to print something containing non-ASCII characters. + sys.stderr = codecs.StreamReader(sys.stderr, 'replace') + + _cpplint_state.ResetErrorCounts() + for filename in filenames: + ProcessFile(filename, _cpplint_state.verbose_level) + # If --quiet is passed, suppress printing error count unless there are errors. + if not _cpplint_state.quiet or _cpplint_state.error_count > 0: + _cpplint_state.PrintErrorCounts() + + if _cpplint_state.output_format == 'junit': + sys.stderr.write(_cpplint_state.FormatJUnitXML()) + + finally: + sys.stderr = backup_err + + sys.exit(_cpplint_state.error_count > 0) + +if __name__ == '__main__': + main() diff --git a/tools/image.mk b/tools/image.mk new file mode 100644 index 0000000..5a1acea --- /dev/null +++ b/tools/image.mk @@ -0,0 +1,96 @@ +# Generates a bootable ISO image that can be transferred to external media, such as CDs or USB sticks. +# This will install, in addition to your kernel, the bootloader GRUB (https://www.gnu.org/software/grub/). +# +# The target 'gemu-iso' is used to test the image generated with 'iso'. +# +# Assuming that a USB mass-storage devices is connected as, for instance /dev/sdc, the target 'usb-sdc' +# can be used to make your device bootable (requires root access, substitute sdc with the matching device). +# Alternatively, you can burn the .iso file directly to CD. + +DD = dd +XORRISO = xorriso +MKISO = grub-mkrescue + +ISODIR = $(BUILDDIR)-iso +ISOGRUBCFG = boot/grub/grub.cfg +ISOKERNEL = boot/kernel +ISOINITRD = initrd +GRUBTITLE = $(shell id -un)s $(PROJECT) +GRUBTIMEOUT = 2 +GRUBBIN = /usr/lib/grub/i386-pc + +# Default ISO target +iso: $(ISOFILE) + +# Create Grub config +$(ISODIR)/$(ISOGRUBCFG): + @echo "GEN $@" + @mkdir -p $(dir $@) + @$(echo) "set timeout=$(GRUBTIMEOUT)\nset default=0\n\nmenuentry \"$(GRUBTITLE)\" {\n\tmultiboot /$(ISOKERNEL)\n\tmodule /$(ISOINITRD)\n\tboot\n}" > $@ + +# Strip debug symbols from kernel binary +$(ISODIR)/$(ISOKERNEL): all + @echo "STRIP $@" + @mkdir -p $(dir $@) + $(VERBOSE) $(STRIP) --strip-debug --strip-unneeded -p -o $@ $(KERNEL) + +# copy inital ramdisk +$(ISODIR)/$(ISOINITRD): all + @echo "CPY $@" + @mkdir -p $(dir $@) + @if [ -s $(INITRD) ] ; then cp -a $(INITRD) $@ ; else touch $@ ; fi + +# Pack to ISO +$(ISOFILE): $(ISODIR)/$(ISOKERNEL) $(ISODIR)/$(ISOINITRD) $(ISODIR)/$(ISOGRUBCFG) + @echo "ISO $@" + @which $(XORRISO) >/dev/null || echo "Xorriso cannot be found - if building the ISO fails, this may be the reason!" >&2 + $(VERBOSE) $(MKISO) -d $(GRUBBIN) -o $@ $(ISODIR) + +# Run ISO in QEMU/KVM +%-iso: $(ISOFILE) + @${MAKE} -s QEMUKERNEL="-cdrom $<" $* + +# Copy ISO to USB device +usb: $(ISOFILE) +ifeq (,$(USBDEV)) + @echo "The environment variable USBDEV must contain the path to the USB mass-storage device:" >&2 + @lsblk -o TYPE,KNAME,SIZE,MODEL -a -p | grep "^disk" | cut -b 6- + @exit 1 +else + $(VERBOSE) $(DD) if=$< of=$(USBDEV) bs=4M status=progress && sync +endif + +# Shorthand to copy ISO to a specific USB device +usb-%: + @$(MAKE) USBDEV=/dev/$* usb + +# Burn ISO to CD +cd: $(ISOFILE) +ifeq (,$(CDRWDEV)) + @echo "The environment variable CDRWDEV must contain the path to the CD/DVD writer" >&2 + @exit 1 +else + $(VERBOSE) $(XORRISO) -as cdrecord -v dev=$(CDRWDEV) -dao $< +endif + +# Shorthand to nurn ISO to specific CD device +cd-%: + @$(MAKE) CDRWDEV=/dev/$* cd + +# The standard target 'clean' removes the whole generated system, the object files, and the dependency files. +clean:: + @echo "RM $(ISODIR)" + $(VERBOSE) rm -rf "$(ISODIR)" "$(ISODIR)$(OPTTAG)" "$(ISODIR)$(NOOPTTAG)" "$(ISODIR)$(DBGTAG)" "$(ISODIR)$(VERBOSETAG)" + +# Documentation +help:: + @$(echo) "Bootable Images\n" \ + " \e[3miso\e[0m Generates a bootable system image (File: $(ISOFILE))\n\n" \ + " \e[3m*-iso\e[0m Simulate the system by booting from the virtual CD drive. (e.g. qemu-iso)\n\n" \ + " \e[3musb\e[0m Generates a bootable USB mass-storage device; the environment\n" \ + " variable \e[4mUSBDEV\e[0m should point to the USB device\n\n" \ + " \e[3mcd\e[0m Generates a bootable CD; the environment variable \e[4mCDRWDEV\e[0m\n" \ + " should point to the CD writer\n\n" + +# Phony targets +.PHONY: iso cd usb help diff --git a/tools/linter.mk b/tools/linter.mk new file mode 100644 index 0000000..e8898cb --- /dev/null +++ b/tools/linter.mk @@ -0,0 +1,30 @@ +# Perform static code checks + +TIDY ?= clang-tidy +CPPLINT ?= /usr/bin/env python "$(CURRENT_DIR)/cpplint.py" + +# Check sources with Clang Tidy +tidy:: +ifeq (,$(CC_SOURCES)) + @echo "(nothing to tidy)" +else + $(VERBOSE) $(TIDY) --format-style=google -header-filter=.* -warnings-as-errors="readability*" -checks="readability*,google-readability-casting,google-explicit-constructor,bugprone*,-bugprone-narrowing-conversions,-bugprone-reserved-identifier,-readability-else-after-return,-readability-magic-numbers" $(filter-out utils/png.cc,$(CC_SOURCES)) -- $(CXXFLAGS_ARCH) $(CXXFLAGS_DEFAULT) $(CXXFLAGS_OPT) +endif + +# Check sources with cpplint +lint:: + @if $(CPPLINT) --quiet --recursive . ; then \ + echo "Congratuations, coding style obeyed!" ; \ + else \ + echo "Coding style violated -- see CPPLINT.cfg for details" ; \ + exit 1 ; \ + fi + +# Documentation +help:: + @$(echo) "Static Analysis and Linter\n" \ + " \e[3mlint\e[0m Checks the coding style using \e[4mCPPLINT\e[0m\n\n" \ + " \e[3mtidy\e[0m Uses \e[4mClang Tidy\e[0m for a static code analysis\n\n" + +# Phony targets +.PHONY: tidy lint help diff --git a/tools/qemu.mk b/tools/qemu.mk new file mode 100644 index 0000000..284f6c4 --- /dev/null +++ b/tools/qemu.mk @@ -0,0 +1,111 @@ +# Targets for running and debugging in Qemu/KVM + +QEMUCPUS ?= 4 +INITRD ?= /dev/null + +# Switch to curses if no graphical output is available +ifeq ($(DISPLAY),) + export QEMUDISPLAY ?= curses +else +# Macos display + ifeq ($(shell uname),Darwin) + export QEMUDISPLAY ?= cocoa + else + export QEMUDISPLAY ?= gtk + endif +endif + +export QEMUEXTRAFLAGS ?= + +# Architecture Specific flags +QEMUFLAGS += -k en-us -machine pcspk-audiodev=pa -d guest_errors -m 2048 +# Macos sound +ifeq ($(shell uname),Darwin) + QEMUFLAGS += -audiodev coreaudio,id=pa +else + QEMUFLAGS += -audiodev pa,id=pa +endif +DBGKERNEL ?= $(KERNEL64) +DBGARCH ?= i386:x86-64 +QEMU ?= qemu-system-x86_64 +QEMUKERNEL ?= -kernel $(KERNEL) -initrd $(INITRD) +KVMFLAGS = -enable-kvm -cpu host +QEMUDBGFLAGS = -no-shutdown -no-reboot + +GDB = $(PREFIX)gdb +ifneq ($(XDG_RUNTIME_DIR),) + # We should prefer using domain sockets in a private directory + GDBPORT ?= $(XDG_RUNTIME_DIR)/stubs-gdb.sock +else + # but fall back to PID + 1024, which should be an unprivileged unique port on single- and multiuser systems + GDBPORT ?= :$(shell echo $$(($$(id -u) + 1024))) +endif +ifneq ($(findstring /,$(GDBPORT)),) + QEMUGDB = -chardev socket,path=${GDBPORT},server=on,wait=off,id=gdb0 -gdb chardev:gdb0 +else + QEMUGDB = -gdb tcp:${GDBPORT} +endif +# (gdb itself supports either :port or a path as target) + +qemu: all + @echo "QEMU ${KERNEL}" + ${VERBOSE} $(QEMU) $(QEMUKERNEL) $(QEMUGDB) -display ${QEMUDISPLAY} -smp $(QEMUCPUS) $(QEMUFLAGS) ${QEMUEXTRAFLAGS} + +# Runs StuBS in Qemu with with hardware accelerations (KVM support) enabled +# The started emulator provides several virtual CPUs that execute in parallel. +kvm: all + @echo "KVM ${KERNEL}" + ${VERBOSE} ${QEMU} $(QEMUKERNEL) $(QEMUGDB) -display ${QEMUDISPLAY} -smp $(QEMUCPUS) $(QEMUFLAGS) ${QEMUEXTRAFLAGS} $(KVMFLAGS) + +# Execute Qemu with activated GDB stub and directly connect GDB to the spawned Qemu. +gdb: all + ${VERBOSE} $(GDB) "$(DBGKERNEL)" \ + -ex "set arch $(DBGARCH)" \ + -ex "target remote | exec $(QEMU) -gdb stdio $(QEMUKERNEL) -smp $(QEMUCPUS) -S $(QEMUFLAGS) $(DBGFLAGS)" + +################################################################ +# Rekursive Targets: Setzen einzelner QEMU Paramter +################################################################ + +################################################################ +# Debugging mit GDB +# Sinnvoll anwendbar mit den Targets: qemu, kvm, *-curses, *-serial +%-gdb: + ${VERBOSE} ${MAKE} QEMUEXTRAFLAGS="${QEMUEXTRAFLAGS} -S" $* + +# um sich mit GDB auf dem Standardport zu verbinden +connect-gdb: + ${VERBOSE} gdb -ex "target remote ${GDBPORT}" $(DBGKERNEL) + +################################################################ +# Eine -display Variante erzwingen +%-curses: + ${VERBOSE} ${MAKE} QEMUDISPLAY="curses" QEMUSERIAL="vc:80Cx24C" $* + +%-x11: + ${VERBOSE} ${MAKE} QEMUDISPLAY=gtk $* + +################################################################ +# Serial-Ausgabe +QEMUSERIAL ?= vc:80Cx24C +QEMUFLAGS += -serial $(QEMUSERIAL) +%-serial: + @echo "HINT: C-a x: Terminate QEMU; C-a c: Open QEMU console" + ${VERBOSE} ${MAKE} QEMUSERIAL=mon:stdio $* + +# Help for Qemu targets +help:: + @$(echo) "System Emulation\n" \ + " \e[3mqemu\e[0m Starts $(PROJECT) in QEMU\n" \ + " Due to the internal design of QEMU, some things (especially\n" \ + " race conditions) might behave different compared to hardware!\n\n" \ + " \e[3mkvm\e[0m Starts $(PROJECT) in KVM, a hardware-accelerated virtual machine\n\n" \ + " \e[3m*-serial\e[0m Redirect the serial console to stdout (e.g., qemu-serial, kvm-serial)\n\n" \ + " \e[3m*-curses\e[0m Use QEMU's curses interface (e.g., qemu-curses).\n"\ + " \e[3m*-x11\e[0m Use QEMU's GTK interface (e.g., qemu-x11).\n\n"\ + " \e[3m*-gdb\e[0m Start Simulator with internal GDB stub and wait for a GDB\n" \ + " to attach. (e.g., qemu-gdb)\n" \ + " \e[3mconnect-gdb\e[0m Connect to waiting GDB. Execute in a second terminal!\n\n"\ + +# Phony targets +.PHONY: qemu kvm help diff --git a/tools/remote.mk b/tools/remote.mk new file mode 100644 index 0000000..0733bd1 --- /dev/null +++ b/tools/remote.mk @@ -0,0 +1,38 @@ +# Test your system on real hardware + +NETBOOT_LOCAL="/ibr/adm/user-boot/" +NETBOOT_HOST="x1.ibr.cs.tu-bs.de" + +# The boot menu shows pairs of `vmlinuz-*` + `initrd-*.img` with owning user and timestamp. +# We just need to choose a name that doesn't overlap with another user's. +netboot: all + $(VERBOSE) \ + if [ ! "$$DEPLOY_LOCAL" ] && [ -e "${NETBOOT_LOCAL}" ] ; then DEPLOY_LOCAL=1 ; fi ; \ + if [ "$$DEPLOY_LOCAL" = 1 ] ; then \ + user=$$USER ; \ + else \ + user=$$( ssh -G ${NETBOOT_HOST} | grep -oPe '^user \K.*' ) ; \ + fi ; \ + \ + mkdir -p $(BUILDDIR)/netboot ; \ + ln -fT ${KERNEL} $(BUILDDIR)/netboot/vmlinuz-$$user ; \ + initrd="$(INITRD)" ; if [ -s "$$initrd" ] ; then \ + ln -fT $$initrd $(BUILDDIR)/netboot/initrd-$$user.img ; \ + else \ + echo "(none)" > $(BUILDDIR)/netboot/initrd-$$user.img ; \ + fi ; \ + \ + if [ "$$DEPLOY_LOCAL" = 1 ] ; then \ + echo "CP to locally as $$user" ; \ + cp $(BUILDDIR)/netboot/vmlinuz-$$user $(BUILDDIR)/netboot/initrd-$$user.img ${NETBOOT_LOCAL} ; \ + else \ + echo "SCP to ${NETBOOT_HOST} as $$user" ; \ + scp $(BUILDDIR)/netboot/vmlinuz-$$user $(BUILDDIR)/netboot/initrd-$$user.img ${NETBOOT_HOST}:${NETBOOT_LOCAL} ; \ + fi + +# Documentation +help:: + @$(echo) "Netboot\n" \ + " \e[3mnetboot\e[0m Copies $(PROJECT) to the network share, allowing the test infrastructure\n" \ + " to boot your system" + diff --git a/types.h b/types.h new file mode 100644 index 0000000..72db63d --- /dev/null +++ b/types.h @@ -0,0 +1,67 @@ +/*! \file + * \brief Definition of standard integer types with specified widths and their + * limits + */ + +#pragma once + +// Standard Integer Types +using uint8_t = unsigned char; +using uint16_t = unsigned short; +using uint32_t = unsigned int; +using uint64_t = unsigned long long; +using uintptr_t = unsigned long long; + +using size_t = __SIZE_TYPE__; + +using int8_t = char; +using int16_t = short; +using int32_t = int; +using int64_t = long long; +using intptr_t = long long; + +using ssize_t = long int; + +using ptrdiff_t = __PTRDIFF_TYPE__; + +// Validate typedef size +static_assert(sizeof(int8_t) == (1), "Wrong size for 'int8_t'"); +static_assert(sizeof(int16_t) == (2), "Wrong size for 'int16_t'"); +static_assert(sizeof(int32_t) == (4), "Wrong size for 'int32_t'"); +static_assert(sizeof(int64_t) == (8), "Wrong size for 'int64_t'"); +static_assert(sizeof(intptr_t) == sizeof(void*), "Wrong size for 'intptr_t'"); +static_assert(sizeof(uint8_t) == (1), "Wrong size for 'uint8_t'"); +static_assert(sizeof(uint16_t) == (2), "Wrong size for 'uint16_t'"); +static_assert(sizeof(uint32_t) == (4), "Wrong size for 'uint32_t'"); +static_assert(sizeof(uint64_t) == (8), "Wrong size for 'uint64_t'"); +static_assert(sizeof(uintptr_t) == sizeof(void*), "Wrong size for 'uintptr_t'"); + +// Limits +constexpr int8_t INT8_MIN = (-__INT8_MAX__ - 1); +constexpr int8_t INT8_MAX = (__INT8_MAX__); +constexpr int16_t INT16_MIN = (-__INT16_MAX__ - 1); +constexpr int16_t INT16_MAX = (__INT16_MAX__); +constexpr int32_t INT32_MIN = (-__INT32_MAX__ - 1); +constexpr int32_t INT32_MAX = (__INT32_MAX__); +constexpr int64_t INT64_MIN = (-__INT64_MAX__ - 1); +constexpr int64_t INT64_MAX = (__INT64_MAX__); +constexpr intptr_t INTPTR_MIN = (-__INTPTR_MAX__ - 1); +constexpr intptr_t INTPTR_MAX = (__INTPTR_MAX__); + +constexpr uint8_t UINT8_MAX = (__UINT8_MAX__); +constexpr uint16_t UINT16_MAX = (__UINT16_MAX__); +constexpr uint32_t UINT32_MAX = (__UINT32_MAX__); +constexpr uint64_t UINT64_MAX = (__UINT64_MAX__); +constexpr uintptr_t UINTPTR_MAX = (__UINTPTR_MAX__); + +constexpr ptrdiff_t PTRDIFF_MIN = (-__PTRDIFF_MAX__ - 1); +constexpr ptrdiff_t PTRDIFF_MAX = (__PTRDIFF_MAX__); + +constexpr size_t SIZE_MAX = (__SIZE_MAX__); +constexpr ssize_t SSIZE_MIN = (-__INT32_MAX__ - 1); +constexpr ssize_t SSIZE_MAX = (__INT32_MAX__); + +/// The name of our OS +constexpr char* OS_NAME = + "MP" + "StuBS"; diff --git a/user/app1/appl.cc b/user/app1/appl.cc new file mode 100644 index 0000000..25f97b1 --- /dev/null +++ b/user/app1/appl.cc @@ -0,0 +1,6 @@ +// vim: set noet ts=4 sw=4: + +#include "appl.h" + +void Application::action() { // NOLINT +} diff --git a/user/app1/appl.h b/user/app1/appl.h new file mode 100644 index 0000000..e39456d --- /dev/null +++ b/user/app1/appl.h @@ -0,0 +1,33 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief Enthält die Klasse Application + */ + +#pragma once +#include "../../types.h" + +//! \brief Test application +//! +//! \todo(12) Create a test application +//! \todo(14) Application should inherit from \ref Thread +//! \todo(16) Make some noise using the \ref PIT::pcspeaker "PC Speaker" +class Application { + // Prevent copies and assignments + Application(const Application&) = delete; + Application& operator=(const Application&) = delete; + + public: + Application(Application&&) = default; // XXX: is this used anywhere? + + /*! \brief Constructor + * + * \todo(14) Implement Constructor + */ + + /*! \brief Contains the application code. + * + * \todo(14) Implement Method + */ + void action(); +}; diff --git a/user/app2/kappl.cc b/user/app2/kappl.cc new file mode 100644 index 0000000..15f58ec --- /dev/null +++ b/user/app2/kappl.cc @@ -0,0 +1,6 @@ +// vim: set noet ts=4 sw=4: + +#include "./kappl.h" + +void KeyboardApplication::action() { // NOLINT +} diff --git a/user/app2/kappl.h b/user/app2/kappl.h new file mode 100644 index 0000000..7621757 --- /dev/null +++ b/user/app2/kappl.h @@ -0,0 +1,29 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief Enthält die Klasse KeyboardApplication + */ + +#pragma once +#include "../../types.h" + +/*! \brief Keyboard Application + * + * \todo(16) Use the keyboard semaphore and buffer in \ref Vault + */ +class KeyboardApplication { + // Prevent copies and assignments + KeyboardApplication(const KeyboardApplication&) = delete; + KeyboardApplication& operator=(const KeyboardApplication&) = delete; + + public: + /*! \brief Constructor + * + * \todo(14) Implement Constructor + */ + + /*! \brief Contains the application code. + * + */ + void action(); +}; diff --git a/utils/flake.lock b/utils/flake.lock new file mode 100644 index 0000000..735cf22 --- /dev/null +++ b/utils/flake.lock @@ -0,0 +1,43 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1740560979, + "narHash": "sha256-Vr3Qi346M+8CjedtbyUevIGDZW8LcA1fTG0ugPY/Hic=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "5135c59491985879812717f4c9fea69604e7f26f", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs", + "systems": "systems" + } + }, + "systems": { + "locked": { + "lastModified": 1680978846, + "narHash": "sha256-Gtqg8b/v49BFDpDetjclCYXm8mAnTrUzR0JnE2nv5aw=", + "owner": "nix-systems", + "repo": "x86_64-linux", + "rev": "2ecfcac5e15790ba6ce360ceccddb15ad16d08a8", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "x86_64-linux", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/utils/flake.nix b/utils/flake.nix new file mode 100644 index 0000000..510879b --- /dev/null +++ b/utils/flake.nix @@ -0,0 +1,58 @@ +/* + * This file allows you to run `nix develop` to get a shell with all external dependencies (`make` plus the tools it calls) of this repo (and maybe some beyond). + * The only thing required is an installation of the "Nix" package manager for Linux (/WSL) or MacOS: https://nixos.org/download/ +*/ +{ + description = '' + The educational operating system StuBS. + ''; + inputs = { + + # The rolling-release of the Nix(OS) package definitions. Use `nix flake lock` to update + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + + # Some boilerplate: + systems.url = "github:nix-systems/x86_64-linux"; # no cross compiling and only x86(_64) for now (darwin (MacOS) might work) + }; + outputs = + inputs: + let + lib = inputs.nixpkgs.lib; + eachSystem = + f: + lib.foldAttrs lib.mergeAttrs { } ( + map (s: lib.mapAttrs (_: v: { ${s} = v; }) (f s)) (import inputs.systems) + ); + in + { } + // (eachSystem ( + localSystem: + let + pkgs = import inputs.nixpkgs { system = localSystem; }; + in + { + + # The shell environment definition used by `nix develop`: + devShells.default = pkgs.mkShell.override { stdenv = pkgs.clangStdenv; } { + nativeBuildInputs = + with pkgs; + [ + # for all tasks and maintenance + gdb + qemu_kvm + nasm + ccache + util-linux # mkfs.minix + git + python3 + bear # make compile_commands.json + clang-tools # for clangd and clang-format + ] + ++ (lib.filter ( + pkg: lib.isDerivation pkg && pkg.pname or "" != "glibc" + ) pkgs.stdenv.allowedRequisites); # basic tools + compilers (gcc/g++) + }; + + } + )); +} diff --git a/utils/math.h b/utils/math.h new file mode 100644 index 0000000..6f8a716 --- /dev/null +++ b/utils/math.h @@ -0,0 +1,27 @@ +// vim: set noet ts=4 sw=4: + +/*! \file + * \brief General purpose \ref Math "math functions" + */ + +#pragma once +#include "../types.h" + +/*! \brief Basic math helper functions + */ +namespace Math { +template +T abs(T a) { + return (a >= 0 ? a : -a); +} + +template +T min(T a, T b) { + return a > b ? b : a; +} + +template +T max(T a, T b) { + return a > b ? a : b; +} +} // namespace Math diff --git a/utils/size.h b/utils/size.h new file mode 100644 index 0000000..ee383fc --- /dev/null +++ b/utils/size.h @@ -0,0 +1,17 @@ +/*! \file + * \brief Template function to determine the length of an array + */ + +#pragma once + +#include "../types.h" + +/* \brief Helper to retrieve the number of elements in an array + * (Warning: template magic) + * \param Array + * \return Number of elements + */ +template +constexpr size_t size(T (& /*unused*/)[N]) { + return N; +} diff --git a/utils/string.cc b/utils/string.cc new file mode 100644 index 0000000..b8cd159 --- /dev/null +++ b/utils/string.cc @@ -0,0 +1,117 @@ +#include "string.h" + +extern "C" char *strchrnul(const char *s, int c) { + if (s != nullptr) { + while (*s != '\0') { + if (*s == c) { + break; + } + s++; + } + } + return const_cast(s); +} + +extern "C" char *strchr(const char *s, int c) { + if (s != nullptr) { + s = strchrnul(s, c); + if (*s == c) { + return const_cast(s); + } + } + return nullptr; +} + +extern "C" int strcmp(const char *s1, const char *s2) { + if (s1 == nullptr || s2 == nullptr) { + return 0; + } + + while (*s1 == *s2++) { + if (*s1++ == '\0') { + return 0; + } + } + return static_cast(*s1) - static_cast(*(s2 - 1)); +} + +extern "C" int strncmp(const char *s1, const char *s2, size_t n) { + if (s1 != nullptr && s2 != nullptr) { + for (size_t i = 0; i < n; i++) { + if (s1[i] != s2[i]) { + return static_cast(s1[i]) - static_cast(s2[i]); + } else if (s1[i] == '\0') { + break; + } + } + } + return 0; +} + +extern "C" size_t strlen(const char *s) { + size_t len = 0; + if (s != nullptr) { + while (*s++ != '\0') { + len++; + } + } + + return len; +} + +extern "C" char *strcpy(char *dest, const char *src) { // NOLINT + char *r = dest; + if (dest != nullptr && src != nullptr) { + while ((*dest++ = *src++) != '\0') { + } + } + return r; +} + +extern "C" char *strncpy(char *dest, const char *src, size_t n) { + char *r = dest; + if (dest != nullptr && src != nullptr) { + while ((n--) != 0 && (*dest++ = *src++) != '\0') { + } + } + return r; +} + +extern "C" void *memcpy(void *__restrict__ dest, void const *__restrict__ src, + size_t size) { + uint8_t *destination = reinterpret_cast(dest); + uint8_t const *source = (uint8_t const *)src; + + for (size_t i = 0; i != size; ++i) { + destination[i] = source[i]; + } + + return dest; +} + +extern "C" void *memmove(void *dest, void const *src, size_t size) { + uint8_t *destination = reinterpret_cast(dest); + uint8_t const *source = reinterpret_cast(src); + + if (source > destination) { + for (size_t i = 0; i != size; ++i) { + destination[i] = source[i]; + } + } else { + for (size_t i = size; i != 0; --i) { + destination[i - 1] = source[i - 1]; + } + } + + return dest; +} + +extern "C" void *memset(void *dest, int pattern, size_t size) { + uint8_t *destination = reinterpret_cast(dest); + + for (size_t i = 0; i != size; ++i) { + destination[i] = static_cast(pattern); + } + + return dest; +} diff --git a/utils/string.h b/utils/string.h new file mode 100644 index 0000000..ad903d0 --- /dev/null +++ b/utils/string.h @@ -0,0 +1,109 @@ +/*! \file + * \brief General purpose \ref string "String functions" + */ +#pragma once + +#include "../types.h" + +/*! \defgroup string String function + * \brief String functions as provided by `%string.h` in the C standard + *library + */ + +/*! \brief Find the first occurrence of a character in a string + * \ingroup string + * \param s string to + * \param c character to find + * \return Pointer to first occurrence of the character + * or to null byte at the end of the string if not found + */ +extern "C" char *strchrnul(const char *s, int c); + +/*! \brief Find the first occurrence of a character in a string + * \ingroup string + * \param s string to + * \param c character to find + * \return Pointer to first occurrence of the character + * or to nullptr if not found + */ +extern "C" char *strchr(const char *s, int c); + +/*! \brief Compare two strings + * \ingroup string + * \param s1 first string + * \param s2 second string + * \return an integer less than, equal to, or greater than zero if first string + * is found, respectively, to be less than, to match, or be greater than second + * string + */ +extern "C" int strcmp(const char *s1, const char *s2); + +/*! \brief Compare two strings + * \ingroup string + * \param s1 first string + * \param s2 second string + * \param n number of bytes to compare + * \return an integer less than, equal to, or greater than zero if the given + * number of bytes of the first string are found, respectively, to be less than, + * to match, or be greater than second string + */ +extern "C" int strncmp(const char *s1, const char *s2, size_t n); + +/*! \brief Calculate the length of a string + * \ingroup string + * \param s pointer to a string + * \return number of bytes in the string + */ +extern "C" size_t strlen(const char *s); + +/*! \brief Copy the contents of a string + * including the terminating null byte (`\0`) + * \ingroup string + * \param dest destination string buffer + * \param src source string buffer + * \return a pointer to the destination string buffer + * \note Beware of buffer overruns! + */ +extern "C" char *strcpy(char *dest, const char *src); // NOLINT + +/*! \brief Copy the contents of a string up to a maximum length + * or the terminating null byte (`\0`), whatever comes first. + * \ingroup string + * \param dest destination string buffer + * \param src source string buffer + * \param n maximum number of bytes to copy + * \return a pointer to the destination string buffer + * \note If there is no null byte (`\0`) among the first `n` bytes, the + * destination will not be null-terminated! + */ +extern "C" char *strncpy(char *dest, const char *src, size_t n); + +/*! \brief Copy a memory area + * \ingroup string + * \param dest destination buffer + * \param src source buffer + * \param size number of bytes to copy + * \return pointer to destination + * \note The memory must not overlap! + */ +extern "C" void *memcpy(void *__restrict__ dest, void const *__restrict__ src, + size_t size); + +/*! \brief Copy a memory area + * while the source may overlap with the destination + * \ingroup string + * \param dest destination buffer + * \param src source buffer + * \param size number of bytes to copy + * \return pointer to destination + */ +extern "C" void *memmove(void *dest, void const *src, size_t size); + +/*! \brief Fill a memory area with a pattern + * \ingroup string + * \param dest destination buffer + * \param pattern single byte pattern + * \param size number of bytes to fill with pattern + * \return pointer to destination + */ +extern "C" void *memset(void *dest, int pattern, size_t size);