URAL 1238. Folding

1. 题目

http://acm.timus.ru/problem.aspx?space=1&num=1238

1238. Folding

Time limit: 1.0 second
Memory limit: 64 MB
Bill is trying to compactly represent sequences of capital alphabetic characters from ‘A’ to ‘Z’ by folding repeating subsequences inside them. For example, one way to represent a sequence AAAAAAAAAABABABCCD is 10(A)2(BA)B2(C)D. He formally defines folded sequences of characters along with the unfolding transformation for them in the following way:
  • A sequence that contains a single character from ‘A’ to ‘Z’ is considered to be a folded sequence. Unfolding of this sequence produces the same sequence of a single character itself.
  • If S and Q are folded sequences, then SQ is also a folded sequence. If S unfolds to S’ and Q unfolds to Q’, then SQ unfolds to S’Q’.
  • If S is a folded sequence, then X(S) is also a folded sequence, where X is a decimal representation of an integer number greater than 1. If S unfolds to S’, then X(S) unfolds to S’ repeated X times.
According to this definition it is easy to unfold any given folded sequence. However, Bill is much more interested in the reverse transformation. He wants to fold the given sequence in such a way that the resulting folded sequence contains the least possible number of characters.

Input

The input contains a single line of characters from ‘A’ to ‘Z’ with at least 1 and at most 100 characters.

Output

Write a single line that contains the shortest possible folded sequence that unfolds to the sequence that is given in the input. If there are many such sequences then write any one of them.

Samples

input output
AAAAAAAAAABABABCCD
9(A)3(AB)CCD
NEERCYESYESYESNEERCYESYESYES
2(NEERC3(YES))
Problem Author: Roman Elizarov
Problem Source: 2002-2003 ACM Northeastern European Regional Programming Contest

2. 思路

给出一种字符串压缩方法,即把重复的字符串压缩为“重复次数(重复子串)”的形式,给出一个字符串,求最短压缩。

记原始字符串为str[0 : L – 1], L为字符串长度。使用seq[i][j]表示对字串str[i : j]的最短压缩,进行区间DP,即:

for subSeqLen = 1 : L - 1
    for start = 0 : L - 1 - subSeqLen
        end = start + subSeqLen
        for mid = start + 1 : end - 1
            seq[start][end] = min(seq[start][end], combine(seq[start][mid], seq[mid][end]))

3. 代码

字符串的处理比较麻烦。程序中使用了SequenceRecord,str存储字符串内容(不含重复次数和括号),rep表示该字符串的重复次数,len表示字符串长度。

#include <cstdio>
#include <cstring>

const int MAX_LENGTH = 110;
const int INF = 0x0fffffff;

typedef struct SequenceRecord {
    char str[MAX_LENGTH];
    int len, rep;
} Sequence;


void solveE8f_Folding();
void setSequence(Sequence *seq, const char *str, int len, int rep);
void combineSequence(Sequence a, Sequence b, Sequence *combined);
void build(Sequence oriSeq, Sequence *result);
void test();

int main() {
    // freopen("test.txt", "r", stdin);
    // test();
    solveE8f_Folding();
    return 0;
}

void test() {
    char input[MAX_LENGTH], input2[MAX_LENGTH];
    scanf("%s", input);
    scanf("%s", input2);
    Sequence seq, seq2;
    setSequence(&seq, input, strlen(input), 1);
    setSequence(&seq2, input2, strlen(input2), 1);
    Sequence b, c;
    combineSequence(seq, seq2, &c);
    build(c, &b);
    printf("%s, %d, %d\n", b.str, b.len, b.rep);
}

Sequence seq[MAX_LENGTH][MAX_LENGTH];
void solveE8f_Folding() {
    char input[MAX_LENGTH];
    scanf("%s", input);
    int len = strlen(input);

    for (int i = 0; i < len; ++i) {
        char str[2] = { input[i], '\0' };
        setSequence(&seq[i][i], str, 1, 1);
    }

    for (int subSeqLen = 1; subSeqLen < len; ++subSeqLen) {
        for (int start = 0; start < len - subSeqLen; ++start) {
            int end = start + subSeqLen;
            Sequence combine;
            combineSequence(seq[start][start], seq[start + 1][end], &combine);
            for (int mid = start + 1; mid < end; ++mid) {
                Sequence tmp;
                combineSequence(seq[start][mid], seq[mid + 1][end], &tmp);
                if (tmp.len < combine.len || ((tmp.len == combine.len) && (tmp.rep > combine.rep))) {
                    combine = tmp;
                }
            }
            seq[start][end] = combine;
        }
    }

    Sequence result;
    build(seq[0][len - 1], &result);
    printf("%s\n", result.str);
}


void setSequence(Sequence *seq, const char *str, int len, int rep) {
    strcpy(seq->str, str);
    seq->len = len;
    seq->rep = rep;
}

void combineSequence(Sequence a, Sequence b, Sequence *combined) {
    if (strcmp(a.str, b.str) == 0) {
        setSequence(combined, a.str, a.len, a.rep + b.rep);
    }
    else {
        Sequence l, r;
        build(a, &l);
        build(b, &r);
        char str[MAX_LENGTH];
        strcpy(str, l.str);
        strcat(str, r.str);
        setSequence(combined, str, l.len + r.len, 1);
    }
}

void build(Sequence oriSeq, Sequence *result) {
    char strRep[MAX_LENGTH];
    sprintf(strRep, "%d", oriSeq.rep);
    int strRepLen = strlen(strRep);

    int foldLen = oriSeq.len + strRepLen + 2, notFoldLen = oriSeq.len * oriSeq.rep;
    bool needFold = foldLen < notFoldLen;

    char strBuild[MAX_LENGTH] = { '\0' };
    if (needFold) {
        sprintf(strBuild, "%d(%s)", oriSeq.rep, oriSeq.str);
        setSequence(result, strBuild, foldLen, 1);
    } else {
        for (int i = 1; i <= oriSeq.rep; ++i) {
            strcat(strBuild, oriSeq.str);
        }
        setSequence(result, strBuild, notFoldLen, 1);
    }
}