数字标识符压缩算法

华为笔试真题 6月3号 非AI方向 100分题型

题目内容

标准的数字标识符由 128128128 位组成,通常表示为 888444 位小写十六进制数,每组之间用冒号 (:::) 分隔。例如:

  • 2001:0db8:85a3:0000:0000:8a2e:0370:73342001:0db8:85a3:0000:0000:8a2e:0370:73342001:0db8:85a3:0000:0000:8a2e:0370:7334
    为了提高可读性,需要对数字标识符进行压缩。

压缩规则:

  • 规则1:前导零压缩:每组 444 位小写十六进制数,前导的零可以省略 例如:0db8→db8,0370→370,0000→00db8 → db8,0370 → 370,0000 → 00db8db8037037000000
  • 规则2:连续相同数字组压缩: 连续的相同数字组(即所有位都是同一个数字,如 0000、1111、aaaa、ffff0000、1111、aaaa、ffff00001111aaaaffff等)可以用双星号 ∗∗** 表示,但只能使用一次
    • 例如:2001:aaaa:bbbb:cccc:dddd:eeee:ffff:2222→2001:∗∗:bbbb:cccc:dddd:eeee:ffff:22222001:aaaa:bbbb:cccc:dddd:eeee:ffff:2222 → 2001:**:bbbb:cccc:dddd:eeee:ffff:22222001:aaaa:bbbb:cccc:dddd:eeee:ffff:22222001::bbbb:cccc:dddd:eeee:ffff:2222
    • 注意:如果有多个连续相同数字组,只能压缩最长的一组;如果长度相同,压缩左边的那一组
  • 规则3:压缩优先级: 首先应用规则 222(连续相同数字组压缩),然后应用规则 111(前导零压缩)
    • 例如:2001:0011:2222:3333:1111:0000:0000:1111→2001:11:2222:3333:1111:∗∗:11112001:0011:2222:3333:1111:0000:0000:1111 → 2001:11:2222:3333:1111:**:11112001:0011:2222:3333:1111:0000:0000:11112001:11:2222:3333:1111::1111,即先合并后压缩前导零

输入描述

  • 一个字符串 identifieridentifieridentifier,表示数字标识符
  • 输入保证是完整且有效的标准数字标识符(888 组,每组 444 位小写十六进制字符)
  • 字符串长度固定为 393939

输出描述

压缩后的数字标识符字符串

样例1

输入

2001:0db8:85a3:0010:0001:8a2e:0370:7334

输出

2001:db8:85a3:10:1:8a2e:370:7334

说明
只应用规则 111,因为没有相同数字组

样例2

输入

1111:2222:2222:3333:4444:5555:6666:7777

输出

1111:**:3333:4444:5555:6666:7777

说明
压缩最长的连续相同数字组(2222:22222222:22222222:2222)

题解和思路

思路

实现思路:模拟

  1. 预处理每组,使用flag数组记录每组是否为相同数组数字组。
  2. 从前往后遍历找到最长连续相同数字组对应起始位置以及长度(相同长度取左边为准)。
  3. 输出结果,对于非最长连续组区域,去除前导零后输出,连续最长连续区域使用 **进行替换。

C++

#include<bits/stdc++.h>
using namespace std;



// 通用 切割函数 函数 将字符串str根据delimiter进行切割
vector<string> split(const string& str, const string& delimiter) {
    vector<string> result;
    size_t start = 0;
    size_t end = str.find(delimiter);
    while (end != string::npos) {
        result.push_back(str.substr(start, end - start));
        start = end + delimiter.length();
        end = str.find(delimiter, start);
    }
    // 添加最后一个部分
    result.push_back(str.substr(start));
    return result;
}

bool judegeSame(string& str) {
    return str[0] == str[1] && str[1] == str[2] && str[2] == str[3];
}

// 取出前导零
string removeLeadZero(const string& str) {
    int i = 0;
    while (i < str.size() && str[i] == '0') {
        i++;
    }
    return i == str.size() ? "0" : str.substr(i);
}

int main() {
    ios_base::sync_with_stdio(false);
    cin.tie(nullptr);
    string input;
    getline(cin, input);
    vector<string> part = split(input, ":");
    int n = part.size();
    vector<bool> flag(n, false);
    for (int i = 0; i < n; i++) {
        flag[i] = judegeSame(part[i]);
    }
    
    int bsetBegin = -1;
    int bestLen = 0;
    
    int curBegin = -1;
    int len = 0;
    // 寻找最长连续相同组
    for (int i = 0; i < n; i++) {
        if (flag[i]) {
            if (curBegin == -1) {
                curBegin = i;
                len = 1;
            } else {
                if (i > 0 && part[i] == part[i-1]) {
                    len++;
                } else {
                    curBegin = i;
                    len = 1;
                }
            }
        } else {
            curBegin = -1;
            len = 0;
        }
        if (curBegin != -1 && len > bestLen) {
            bsetBegin = curBegin;
            bestLen = len;
        }
    }
    
    // 输出结果
    int index = 0;
    while (index < n) {
        if (index > 0) {
            cout << ":";
        }
        if (index == bsetBegin) {
            cout << "**";
            index += bestLen;
        } else {
            cout << removeLeadZero(part[index]);
            index++;
        }
    }
    return 0;
}

Java

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.*;

public class Main {

    boolean judegeSame(String str) {
        return str.charAt(0) == str.charAt(1)
                && str.charAt(1) == str.charAt(2)
                && str.charAt(2) == str.charAt(3);
    }

    // 取出前导零
    String removeLeadZero(String str) {
        int i = 0;
        while (i < str.length() && str.charAt(i) == '0') {
            i++;
        }
        return i == str.length() ? "0" : str.substring(i);
    }

    void solve() throws Exception {
        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
        String input = br.readLine();

        String[] part = input.split(":", -1);
        int n = part.length;

        boolean[] flag = new boolean[n];
        for (int i = 0; i < n; i++) {
            flag[i] = judegeSame(part[i]);
        }

        int bestBegin = -1;
        int bestLen = 0;

        int curBegin = -1;
        int len = 0;

        // 寻找最长连续相同组
        for (int i = 0; i < n; i++) {
            if (flag[i]) {
                if (curBegin == -1) {
                    curBegin = i;
                    len = 1;
                } else {
                    if (i > 0 && part[i].equals(part[i - 1])) {
                        len++;
                    } else {
                        curBegin = i;
                        len = 1;
                    }
                }
            } else {
                curBegin = -1;
                len = 0;
            }

            if (curBegin != -1 && len > bestLen) {
                bestBegin = curBegin;
                bestLen = len;
            }
        }

        StringBuilder sb = new StringBuilder();
        int index = 0;

        while (index < n) {
            if (index > 0) {
                sb.append(":");
            }

            if (index == bestBegin) {
                sb.append("**");
                index += bestLen;
            } else {
                sb.append(removeLeadZero(part[index]));
                index++;
            }
        }

        System.out.print(sb);
    }

    public static void main(String[] args) throws Exception {
        new Main().solve();
    }
}

python

# 判断四个字符是否全部相同
def judege_same(s):
    return s[0] == s[1] and s[1] == s[2] and s[2] == s[3]


# 取出前导零
def remove_lead_zero(s):
    i = 0
    while i < len(s) and s[i] == '0':
        i += 1
    return "0" if i == len(s) else s[i:]


def main():
    input_str = input()

    part = input_str.split(":")
    n = len(part)

    flag = [False] * n
    for i in range(n):
        flag[i] = judege_same(part[i])

    best_begin = -1
    best_len = 0

    cur_begin = -1
    length = 0

    # 寻找最长连续相同组
    for i in range(n):
        if flag[i]:
            if cur_begin == -1:
                cur_begin = i
                length = 1
            else:
                if i > 0 and part[i] == part[i - 1]:
                    length += 1
                else:
                    cur_begin = i
                    length = 1
        else:
            cur_begin = -1
            length = 0

        if cur_begin != -1 and length > best_len:
            best_begin = cur_begin
            best_len = length

    result = []
    index = 0
    # 输出结果
    while index < n:
        if index == best_begin:
            result.append("**")
            index += best_len
        else:
            result.append(remove_lead_zero(part[index]))
            index += 1

    print(":".join(result))


if __name__ == "__main__":
    main()

Javascript

const readline = require('readline');

const rl = readline.createInterface({
    input: process.stdin,
    output: process.stdout
});

let input = [];

rl.on('line', line => {
    input.push(line);
});

rl.on('close', () => {
    const str = input[0];

    // 判断四个字符是否全部相同
    function judegeSame(s) {
        return s[0] === s[1]
            && s[1] === s[2]
            && s[2] === s[3];
    }

    // 取出前导零
    function removeLeadZero(s) {
        let i = 0;
        while (i < s.length && s[i] === '0') {
            i++;
        }
        return i === s.length ? "0" : s.substring(i);
    }

    const part = str.split(":");
    const n = part.length;

    const flag = new Array(n).fill(false);

    for (let i = 0; i < n; i++) {
        flag[i] = judegeSame(part[i]);
    }

    let bestBegin = -1;
    let bestLen = 0;

    let curBegin = -1;
    let len = 0;

    // 寻找最长连续相同组
    for (let i = 0; i < n; i++) {
        if (flag[i]) {
            if (curBegin === -1) {
                curBegin = i;
                len = 1;
            } else {
                if (i > 0 && part[i] === part[i - 1]) {
                    len++;
                } else {
                    curBegin = i;
                    len = 1;
                }
            }
        } else {
            curBegin = -1;
            len = 0;
        }

        if (curBegin !== -1 && len > bestLen) {
            bestBegin = curBegin;
            bestLen = len;
        }
    }

    const result = [];
    let index = 0;

    while (index < n) {
        if (index === bestBegin) {
            result.push("**");
            index += bestLen;
        } else {
            result.push(removeLeadZero(part[index]));
            index++;
        }
    }

    console.log(result.join(":"));
});

Go

package main

import (
	"bufio"
	"fmt"
	"os"
	"strings"
)

// 判断四个字符是否全部相同
func judegeSame(str string) bool {
	return str[0] == str[1] &&
		str[1] == str[2] &&
		str[2] == str[3]
}

// 取出前导零
func removeLeadZero(str string) string {
	i := 0
	for i < len(str) && str[i] == '0' {
		i++
	}

	if i == len(str) {
		return "0"
	}
	return str[i:]
}

func main() {
	reader := bufio.NewReader(os.Stdin)
	input, _ := reader.ReadString('\n')
	input = strings.TrimSpace(input)

	part := strings.Split(input, ":")
	n := len(part)

	flag := make([]bool, n)
	for i := 0; i < n; i++ {
		flag[i] = judegeSame(part[i])
	}

	bestBegin := -1
	bestLen := 0

	curBegin := -1
	length := 0

	// 寻找最长连续相同组
	for i := 0; i < n; i++ {
		if flag[i] {
			if curBegin == -1 {
				curBegin = i
				length = 1
			} else {
				if i > 0 && part[i] == part[i-1] {
					length++
				} else {
					curBegin = i
					length = 1
				}
			}
		} else {
			curBegin = -1
			length = 0
		}

		if curBegin != -1 && length > bestLen {
			bestBegin = curBegin
			bestLen = length
		}
	}

	var result []string
	index := 0

	for index < n {
		if index == bestBegin {
			result = append(result, "**")
			index += bestLen
		} else {
			result = append(result, removeLeadZero(part[index]))
			index++
		}
	}

	fmt.Println(strings.Join(result, ":"))
}
Logo

AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念,把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起,为开发者提供从开发、训练到部署的一站式体验。

更多推荐