xv6 Lab1: Xv6 and Unix utilities

Lab: https://pdos.csail.mit.edu/6.828/2022/labs/util.html

实现一些基础的用户程序，帮助熟悉 xv6 操作系统以及它的系统接口，系统有如下接口:

C 程序中的命令行参数: argc 表示命令行参数数量, char* argv[] 用于接收命令行参数, 通常 argv[0] 表示程序名称, argv[1]以及往后表示命令行参数, 例如运行 ./sleep.c 10 , argc 就等于 2, argv[0] = ./slepp.c , argv[1] = 10.

sleep(easy)

Implement the UNIX program sleep for xv6; your sleep should pause for a user-specified number of ticks. A tick is a notion of time defined by the xv6 kernel, namely the time between two interrupts from the timer chip. Your solution should be in the file user/sleep.c.

首先查看 user/echo.c 如何实现，该命令用于将字符串输出，在 xv6 中实现如下

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int main(int argc, char *argv[]) {
  int i;

  for(i = 1; i < argc; i++){
    write(1, argv[i], strlen(argv[i]));
    if(i + 1 < argc){
      write(1, " ", 1);
    } else {
      write(1, "\n", 1);
    }
  }
  exit(0);
}

然后查看 user/grep.c 的实现

// Simple grep.  Only supports ^ . * $ operators.

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

char buf[1024];
int match(char*, char*);

void grep(char *pattern, int fd) {
  int n, m;
  char *p, *q;

  m = 0;
  while((n = read(fd, buf+m, sizeof(buf)-m-1)) > 0){
    m += n;
    buf[m] = '\0';
    p = buf;
    while((q = strchr(p, '\n')) != 0){
      *q = 0;
      if(match(pattern, p)){
        *q = '\n';
        write(1, p, q+1 - p);
      }
      p = q+1;
    }
    if(m > 0){
      m -= p - buf;
      memmove(buf, p, m);
    }
  }
}

int main(int argc, char *argv[]) {
  int fd, i;
  char *pattern;

  if(argc <= 1){
    fprintf(2, "usage: grep pattern [file ...]\n");
    exit(1);
  }
  pattern = argv[1];

  if(argc <= 2){
    grep(pattern, 0);
    exit(0);
  }

  for(i = 2; i < argc; i++){
    if((fd = open(argv[i], 0)) < 0){
      printf("grep: cannot open %s\n", argv[i]);
      exit(1);
    }
    grep(pattern, fd);
    close(fd);
  }
  exit(0);
}

// Regexp matcher from Kernighan & Pike,
// The Practice of Programming, Chapter 9, or
// https://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html

int matchhere(char*, char*);
int matchstar(int, char*, char*);

int match(char *re, char *text) {
  if(re[0] == '^')
    return matchhere(re+1, text);
  do {  // must look at empty string
    if(matchhere(re, text))
      return 1;
  } while(*text++ != '\0');
  return 0;
}

// matchhere: search for re at beginning of text
int matchhere(char *re, char *text) {
  if(re[0] == '\0')
    return 1;
  if(re[1] == '*')
    return matchstar(re[0], re+2, text);
  if(re[0] == '$' && re[1] == '\0')
    return *text == '\0';
  if(*text!='\0' && (re[0]=='.' || re[0]==*text))
    return matchhere(re+1, text+1);
  return 0;
}

// matchstar: search for c*re at beginning of text
int matchstar(int c, char *re, char *text) {
  do{  // a * matches zero or more instances
    if(matchhere(re, text))
      return 1;
  } while(*text!='\0' && (*text++==c || c=='.'));
  return 0;
}

这是一个简单的 grep 的实现，仅支持四个简单的正则表达式运算符: ^，.，*，$。它们的含义如下:

^: 这个符号表示行的开始。如果一个正则表达式以^开头，那么它只会匹配那些以指定模式开始的文本。例如，^abc 会匹配所有以abc开始的行

.: 这个符号表示任意单个字符。例如，a.c 可以匹配 abc，adc，a*c等

*: 这个符号表示前面的元素可以重复任意次（包括零次）。例如，a可以匹配空字符串，a，aa，aaa等。如果它跟在.后面（即`.`），那么它可以匹配任意数量的任意字符。

$: 这个符号表示行的结束。如果一个正则表达式以$结尾，那么它只会匹配那些以指定模式结束的文本。例如，abc$ 会匹配所有以abc结束的行。

在这个程序中:

grep 函数读取一个文件（由文件描述符fd指定），并查找与给定模式pattern匹配的行。它将每一行读入缓冲区buf，然后使用match函数检查这一行是否与模式匹配。如果匹配，它就将这一行写入标准输出。
match 函数检查文本text是否与正则表达式re匹配。如果正则表达式以^开头，它就使用matchhere函数检查文本是否在开头处与剩余的正则表达式匹配。否则，它就在文本的每个位置尝试匹配整个正则表达式。
matchhere 函数检查文本text是否在开头处与正则表达式re匹配。如果正则表达式的第二个字符是*，它就使用matchstar函数处理。否则，它就检查正则表达式的第一个字符是否与文本的第一个字符匹配，如果匹配，就在剩余的正则表达式和剩余的文本上递归调用自己。
matchstar 函数处理带星号的正则表达式。星号表示前面的字符可以出现任意次数（包括零次）。它在文本的每个位置尝试匹配剩余的正则表达式，直到找到一个匹配的位置或者到达文本的末尾。

然后查看 user/rm.c 实现:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int main(int argc, char *argv[]) {
  int i;

  if(argc < 2){
    fprintf(2, "Usage: rm files...\n");
    exit(1);
  }

  for(i = 1; i < argc; i++){
    if(unlink(argv[i]) < 0){
      fprintf(2, "rm: %s failed to delete\n", argv[i]);
      break;
    }
  }

  exit(0);
}

unlink 是一个系统调用，它从文件系统中删除一个文件的链接。如果文件的链接数减少到0，并且没有任何进程打开该文件，那么文件系统就会释放该文件的空间。如果unlink函数返回一个负值，那么就说明删除操作失败。

最后根据上面几个例子可以实现 sleep

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

int main(int argc, char* argv[]) {
    if(argc != 2) {
        fprintf(2, "sleep error\n");
        exit(1);
    }
    int number = atoi(argv[1]);
    sleep(number);
    exit(0);
}

测试成绩: make GRADEFLAGS=sleep grade

pingpong(easy)

Write a program that uses UNIX system calls to ‘’ping-pong’’ a byte between two processes over a pair of pipes, one for each direction. The parent should send a byte to the child; the child should print “: received ping”, where is its process ID, write the byte on the pipe to the parent, and exit; the parent should read the byte from the child, print “: received pong”, and exit. Your solution should be in the file user/pingpong.c.

代码如下:

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

#define INDEX_READ 0
#define INDEX_WRITE 1
#define BUFFER_SIZE 16

int main(int argc, char* argv[]) {
    int fd1[2], fd2[2]; // 父->子，子->父
    pipe(fd1);
    pipe(fd2);
    int pid;
    pid = fork();
    if(pid < 0) {
        fprintf(2, "fork failed\n");
        exit(1);
    } 
    if(pid == 0) {
        // 子进程
        /*
         子进程接收一个来自父进程的字节然后打印 <pid>: received ping ，然后向父进程写一个字节
         所以 fd1 只需要读，fd2 只需要写
        */
        close(fd1[INDEX_WRITE]);
        close(fd2[INDEX_READ]);
        char buf[BUFFER_SIZE];
        if(read(fd1[INDEX_READ], buf, 1) == 1) {
            fprintf(1, "%d: received ping\n", getpid());
        }
        write(fd2[INDEX_WRITE], "a", 1);
        close(fd2[INDEX_WRITE]); // 防止堵塞
        exit(0);
    } else {
        // 父进程
        /*
         父进程向子进程发送一个字节，然后从子进程接收一个字节，然后打印 <pid>: received pong
         所以 fd1 只需要写，fd2 只需要读
        */
        close(fd1[INDEX_READ]);
        close(fd2[INDEX_WRITE]);
        write(fd1[INDEX_WRITE], "a", 1);
        close(fd1[INDEX_WRITE]); // 防止堵塞
        char buf[BUFFER_SIZE];
        if(read(fd2[INDEX_READ], buf, 1) == 1) {
            fprintf(1, "%d: received pong\n", getpid());
        }
    }
    exit(0);
}

primes(moderate)/(hard)

Write a concurrent version of prime sieve using pipes. This idea is due to Doug McIlroy, inventor of Unix pipes. The picture halfway down this page and the surrounding text explain how to do it. Your solution should be in the file user/primes.c.
Your goal is to use pipe and fork to set up the pipeline. The first process feeds the numbers 2 through 35 into the pipeline. For each prime number, you will arrange to create one process that reads from its left neighbor over a pipe and writes to its right neighbor over another pipe. Since xv6 has limited number of file descriptors and processes, the first process can stop at 35.

递归处理，维护左边的进程和下边的进程即可。需要注意管道的打开和关闭。

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"

#define STD_IN 0
#define STD_OUT 1
#define STD_ERR 2
#define INDEX_READ 0
#define INDEX_WRITE 1
#define BUFFER_SIZE 16

void prime(int fd[2]) {
    // 子->孙
    close(fd[INDEX_WRITE]);
    int num;
    if(read(fd[INDEX_READ], &num, sizeof(num)) == 0) {
        close(fd[INDEX_READ]);
        exit(0);
    }
    fprintf(STD_OUT, "prime %d\n", num);
    int p[2];
    pipe(p);
    if(fork() == 0) {
        prime(p);
    } else {
        close(p[INDEX_READ]);
        int now;
        while(read(fd[INDEX_READ], &now, sizeof(now)) > 0) {
            if(now % num != 0) {
                write(p[INDEX_WRITE], &now, sizeof(now));
            }
        }
        close(p[INDEX_WRITE]);
        wait(0);
    }
    close(fd[INDEX_READ]);
    exit(0);
}

int main(int argc, char* argv[]) {
    int fd[2];
    pipe(fd);
    int pid = fork();
    if(pid == 0) {
        // 子进程
        prime(fd);
    } else {
        // 父进程, 父->子，父写子读
        close(fd[INDEX_READ]);
        for(int i = 2; i <= 35; i ++) {
            if(write(fd[INDEX_WRITE], &i, sizeof(i)) != sizeof(i)) {
                fprintf(STD_ERR, "write error(1)\n");
                exit(1);
            }
        }
        close(fd[INDEX_WRITE]);
        wait(0);
    }
    exit(0);
}

find(moderate)

Write a simple version of the UNIX find program: find all the files in a directory tree with a specific name. Your solution should be in the file user/find.c.

可以查看 ls.c 的实现: 刘刘大顺: xv6中对ls程序的实现(源码详细解析) ，然后仿照完成即可。

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"
#include "kernel/fs.h"

#define STD_IN 0
#define STD_OUT 1
#define STD_ERR 2
#define BUFFER_SIZE 16

char* fmtname(char *path) {
    static char buf[DIRSIZ+1];
    char *p;

    // Find first character after last slash.
    for(p=path+strlen(path); p >= path && *p != '/'; p--)
        ;
    p ++;
    return p;
}

void find(char *dir, char* file) {
    char buf[512], *p;
    int fd;
    struct dirent de;
    struct stat st;

    if((fd = open(dir, 0)) < 0){
        fprintf(STD_ERR, "ls: cannot open %s\n", dir);
        return;
    }
    if(fstat(fd, &st) < 0){
        fprintf(STD_ERR, "ls: cannot stat %s\n", dir);
        close(fd);
        return;
    }
    switch(st.type) {
        case T_DEVICE: // 不做处理
        case T_FILE:
            if(strcmp(fmtname(dir), file) == 0) {
                fprintf(STD_OUT, "%s\n", dir);
            }
            break;
        case T_DIR:
            if(strlen(dir) + 1 + DIRSIZ + 1 > sizeof buf) {
                printf("ls: path too long\n");
                break;
            }
            strcpy(buf, dir);
            p = buf + strlen(buf);
            *p++ = '/';
            while(read(fd, &de, sizeof(de)) == sizeof(de)) {
                if(de.inum == 0 || (strcmp(de.name, ".") == 0) || (strcmp(de.name, "..") == 0))
                    continue;
                memmove(p, de.name, DIRSIZ);
                p[DIRSIZ] = 0;
                if(stat(buf, &st) < 0){
                    printf("ls: cannot stat %s\n", buf);
                    continue;
                }
                find(buf, file);
            }
            break;
    }
    close(fd);
}

int main(int argc, char *argv[]) {
    if(argc != 3) {
        fprintf(STD_ERR, "Usage: find <dirName> <fileName>\n");
        exit(-1);
    }
    find(argv[1], argv[2]);
    exit(0);
}

xargs(moderate)

具体实现过程发布在知乎: xv6-Lab Utilities-xargs

#include "kernel/types.h"
#include "kernel/stat.h"
#include "user/user.h"
#include "kernel/param.h"

#define STD_IN 0
#define STD_OUT 1
#define STD_ERR 2
#define BUFFER_SIZE 512

char *ugets(char *buf, int max) {
    char c;
    int i;
    for (i = 0; i + 1 < max;) {
        int cc = read(0, &c, 1);
        if (cc < 1)
            break;
        if (c == ' ' || c == '\n' || c == '\r')
            break;
        buf[i ++] = c; // 不读入 ' ' 和 '\n'
    }
    buf[i] = '\0';
    return buf;
}

int ugetcmd(char *buf, int nbuf) {
    memset(buf, 0, nbuf);
    ugets(buf, nbuf);
    if (buf[0] == 0) // EOF
        return -1;
    return 0;
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        fprintf(STD_ERR, "Usage: xargs command\n");
        exit(1);
    }
    char *_argv[MAXARG];
    int _argc = argc - 1;
    for(int i = 0; i < _argc; i ++) {
        _argv[i] = argv[i + 1];
    }
    char buf[BUFFER_SIZE];
    while(ugetcmd(buf, sizeof(buf)) != -1) {
        if(fork() == 0) {
            _argv[_argc] = buf;
            _argc ++; // 在父进程中保留所有的输入参数, 可以删掉
            exec(argv[1], _argv);
            fprintf(STD_ERR, "exec %s failed\n", argv[1]); // 执行失败
            exit(0);
        }
        wait(0);
    }
    exit(0);
}