Added note and refactor code #16

Merged
PeterSurda merged 6 commits from swapnil/idlers-agent:main into main 2024-06-26 01:47:20 +02:00

View File

@ -3,6 +3,8 @@ import urllib.request
import logging
import json
import http.client
import subprocess
import re
NON_UPDATABLE_KEYS = [
'server_type',
@ -25,10 +27,10 @@ class ServerData:
def __init__(self):
self.hostname = os.uname().nodename
self.public_ip = self.get_public_ip()
self.dmidecode_data = parse_dmidecode_output()
self.dmidecode_data = self.parse_dmidecode_output()
logging.basicConfig(level=logging.INFO)
def parse_dmidecode_output():
def parse_dmidecode_output(self):
'''
Example dmidecode output:
@ -161,6 +163,40 @@ class ServerData:
logging.info("Post data created")
return post_data
def create_note_data(self):
chassis_info = None
for section in self.dmidecode_data:
if section['DMIType'] == 1:
chassis_info = section
break
if chassis_info:
Review

We should also report "baseboard", and "system", if present. Traditional servers seem to report "system" and/or "chassis" whereas custom build machines report only "baseboard" (i.e. motherboard), as there is no way for the motherboard to find out what kind of chassis it is mounted into.

We should also report "baseboard", and "system", if present. Traditional servers seem to report "system" and/or "chassis" whereas custom build machines report only "baseboard" (i.e. motherboard), as there is no way for the motherboard to find out what kind of chassis it is mounted into.
Review

Please share a sample sudo dmidecode -t1 output.
Here's what I see on test2:

root@test2:~# sudo dmidecode -t1
# dmidecode 3.3
Getting SMBIOS data from sysfs.
SMBIOS 2.7 present.

Handle 0x0001, DMI type 1, 27 bytes
System Information
	Manufacturer: System manufacturer
	Product Name: System Product Name
	Version: System Version
	Serial Number: System Serial Number
	UUID: 2aa1f520-d7da-11dd-b8b7-08606ee5b794
	Wake-up Type: Power Switch
	SKU Number: SKU
	Family: To be filled by O.E.M.
Please share a sample `sudo dmidecode -t1` output. Here's what I see on test2: ```bash root@test2:~# sudo dmidecode -t1 # dmidecode 3.3 Getting SMBIOS data from sysfs. SMBIOS 2.7 present. Handle 0x0001, DMI type 1, 27 bytes System Information Manufacturer: System manufacturer Product Name: System Product Name Version: System Version Serial Number: System Serial Number UUID: 2aa1f520-d7da-11dd-b8b7-08606ee5b794 Wake-up Type: Power Switch SKU Number: SKU Family: To be filled by O.E.M. ```
Review

one:

Handle 0x0001, DMI type 1, 27 bytes
System Information
	Manufacturer: To Be Filled By O.E.M.
	Product Name: X570D4U
	Version: To Be Filled By O.E.M.
	Serial Number: To Be Filled By O.E.M.
	UUID: 7533e379-96c4-49d9-f452-a8a159c72190
	Wake-up Type: Power Switch
	SKU Number: To Be Filled By O.E.M.
	Family: To Be Filled By O.E.M.

two:

Handle 0x0100, DMI type 1, 27 bytes
System Information
	Manufacturer: Dell Inc.
	Product Name: PowerEdge R630
	Version: Not Specified
	Serial Number: 1W9PPM2
	UUID: 4c4c4544-0057-3910-8050-b1c04f504d32
	Wake-up Type: Power Switch
	SKU Number: SKU=NotProvided;ModelName=PowerEdge R630
	Family: Not Specified
one: ``` Handle 0x0001, DMI type 1, 27 bytes System Information Manufacturer: To Be Filled By O.E.M. Product Name: X570D4U Version: To Be Filled By O.E.M. Serial Number: To Be Filled By O.E.M. UUID: 7533e379-96c4-49d9-f452-a8a159c72190 Wake-up Type: Power Switch SKU Number: To Be Filled By O.E.M. Family: To Be Filled By O.E.M. ``` two: ``` Handle 0x0100, DMI type 1, 27 bytes System Information Manufacturer: Dell Inc. Product Name: PowerEdge R630 Version: Not Specified Serial Number: 1W9PPM2 UUID: 4c4c4544-0057-3910-8050-b1c04f504d32 Wake-up Type: Power Switch SKU Number: SKU=NotProvided;ModelName=PowerEdge R630 Family: Not Specified ```
chassis_model = chassis_info.get('Product Name', 'Unknown')
chassis_serial = chassis_info.get('Serial Number', 'Unknown')
else:
chassis_model = chassis_serial = 'Unknown'
processor_info = [section for section in self.dmidecode_data if section['DMIType'] == 4]
processor_model = processor_info[0].get('Version', 'Unknown') if processor_info else 'Unknown'
processor_count = len(processor_info)
PeterSurda marked this conversation as resolved
Review

I need to verify how it works if there are multiple processors. I have a couple of dual-socket systems, but they always have the same processor model.

I need to verify how it works if there are multiple processors. I have a couple of dual-socket systems, but they always have the same processor model.
Review

Even this part of the code assumes multiple sections with 'DMIType' 4. But only gets 'Version' from the first section. Since you've said "they always have the same processor model", I assume this code should be fine.

Even this part of the code assumes multiple sections with `'DMIType'` 4. But only gets `'Version'` from the first section. Since you've said "they always have the same processor model", I assume this code should be fine.
Review

Yes looks like it's ok.

Yes looks like it's ok.
ram_info = [section for section in self.dmidecode_data if section['DMIType'] == 17]
ram_details = []
for ram in ram_info:
size = ram.get('Size', 'Unknown')
speed = ram.get('Speed', 'Unknown')
ecc = 'Yes' if ram.get('Total Width') == '72 bits' and ram.get('Data Width') == '64 bits' else 'No'
Review

I was looking at some systems, and sometimes both TotalWidth and DataWidth is 72. I'm not really sure what it means. I vaguely remember reading that that's a bug in some DDR4 modules or motherboards. For now I would only check for TotalWidth and ignore DataWidth.

I was looking at some systems, and sometimes both TotalWidth and DataWidth is 72. I'm not really sure what it means. I vaguely remember reading that that's a bug in some DDR4 modules or motherboards. For now I would only check for TotalWidth and ignore DataWidth.
Review

I did some reading on this. TotalWidth is DataWidth + (any extra bits for error correction). So it doesn't matter what the exact number of the TotalWidth is, at least in theory. However, common configuration for ECC memory is to have TotalWidth of 72 bits and a DataWidth of 64, giving extra 8 bits for error checking.

TLDR; to determine if a memory module is ECC or non-ECC, we should compare TotalWidth and DataWidth. if TotalWidth > DataWidth: true else false.

I did some reading on this. TotalWidth is DataWidth + (any extra bits for error correction). So it doesn't matter what the exact number of the TotalWidth is, at least in theory. However, common configuration for ECC memory is to have TotalWidth of 72 bits and a DataWidth of 64, giving extra 8 bits for error checking. TLDR; to determine if a memory module is ECC or non-ECC, we should compare TotalWidth and DataWidth. `if TotalWidth > DataWidth: true else false`.
Review

It misreports on some DDR4 systems and I haven't found conclusive clarification. I have one system for example which reports TotalWidth 72 and DataWidth 72. But it doesn't influence the amount of available memory.

It misreports on some DDR4 systems and I haven't found conclusive clarification. I have one system for example which reports TotalWidth 72 and DataWidth 72. But it doesn't influence the amount of available memory.
serial_number = ram.get('Serial Number', 'Unknown')
ram_type = ram.get('Type', 'Unknown')
ram_details.append("Size: {}, Speed: {}, ECC: {}, Serial Number: {}, Type: {}".format(size, speed, ecc, serial_number, ram_type))
Review

There are actually two speeds reported, one is the specification of the module and the other is configured speed. What you could do, is to report them both separated with "@". E.g. "1866@1333MHz".

There are actually two speeds reported, one is the specification of the module and the other is configured speed. What you could do, is to report them both separated with "@". E.g. "1866@1333MHz".
Review

Example output:

root@test2:~# sudo dmidecode -t17
# dmidecode 3.3
Getting SMBIOS data from sysfs.
SMBIOS 2.7 present.

Handle 0x005D, DMI type 17, 34 bytes
Memory Device
	Array Handle: 0x005E
	Error Information Handle: 0x0062
	Total Width: 64 bits
	Data Width: 64 bits
	Size: 8 GB
	Form Factor: DIMM
	Set: None
	Locator: ChannelA-DIMM0
	Bank Locator: BANK 0
	Type: DDR3
	Type Detail: Synchronous
	Speed: 1600 MT/s
	Manufacturer: Micron
	Serial Number: 1FC8D19A
	Asset Tag: 9876543210
	Part Number: 16KTF1G64AZ-1G9P1
	Rank: 2
	Configured Memory Speed: 1600 MT/s

So "Speed" will be "1600 MT/s @ 1600 MT/s" ?

Example output: ```bash root@test2:~# sudo dmidecode -t17 # dmidecode 3.3 Getting SMBIOS data from sysfs. SMBIOS 2.7 present. Handle 0x005D, DMI type 17, 34 bytes Memory Device Array Handle: 0x005E Error Information Handle: 0x0062 Total Width: 64 bits Data Width: 64 bits Size: 8 GB Form Factor: DIMM Set: None Locator: ChannelA-DIMM0 Bank Locator: BANK 0 Type: DDR3 Type Detail: Synchronous Speed: 1600 MT/s Manufacturer: Micron Serial Number: 1FC8D19A Asset Tag: 9876543210 Part Number: 16KTF1G64AZ-1G9P1 Rank: 2 Configured Memory Speed: 1600 MT/s ``` So "Speed" will be "1600 MT/s @ 1600 MT/s" ?
Review

Yes.

Yes.
note = "Chassis Model: {} | Serial Number: {} ||| Processor Model: {} | Count: {} ||| RAM Details: {}".format(
chassis_model, chassis_serial, processor_model, processor_count, ' | '.join(ram_details))
note_data = {
'note': note,
}
return note_data
class ServerManager:
def __init__(self, host, api_key):
@ -219,6 +255,36 @@ class ServerManager:
return server['id']
return None
def upsert_server(self, post_data):
server_id = self.existing_server_id(post_data)
if server_id:
logging.info('Server already exists with id: {}, Updating...'.format(server_id))
response = self.update_server(post_data, server_id)
else:
logging.info('Server does not exist, Creating...')
response = self.create_server(post_data)
# Extract the server_id from the response
server_id = json.loads(response).get('server_id', None)
if server_id is None:
logging.error('Failed to get server_id from response: {}'.format(response))
raise ValueError('Failed to get server_id from response')
return server_id
def upsert_note(self, note_data, server_id):
note_data['service_id'] = server_id
try:
note = self.get_note(server_id)
except urllib.error.HTTPError:
note = None
if note:
return self.update_note(note_data, server_id)
else:
return self.create_note(note_data)
def validate_env_vars():
api_key = os.getenv('AGENT_API')
host = os.getenv('HOST')
@ -238,29 +304,11 @@ def main():
server_manager = ServerManager(host, api_key)
# Check if the server already exists
server_id = server_manager.existing_server_id(post_data)
server_id = server_manager.upsert_server(post_data)
logging.info('Server id: {}'.format(server_id))
# If the server exists, update it
if server_id:
logging.info('Server already exists with id: {}, Updating...'.format(server_id))
logging.info(server_manager.update_server(post_data, server_id))
else:
logging.info('Server does not exist, Creating...')
logging.info(server_manager.create_server(post_data))
note_data = {
'service_id': server_id,
'note': 'Bla bla bla'
}
try:
note = server_manager.get_note(server_id)
except urllib.error.HTTPError:
note = None
if note:
server_manager.update_note(note_data, server_id)
else:
server_manager.create_note(note_data)
note_data = server_data.create_note_data()
server_manager.upsert_note(note_data, server_id)
if __name__ == '__main__':
main()