/[osn-commons]/trunk/srcproc/srcstats.c
ViewVC logotype

Contents of /trunk/srcproc/srcstats.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 63 - (show annotations)
Thu Sep 12 14:07:24 2024 UTC (6 months, 2 weeks ago) by rakinar2
File MIME type: text/x-c
File size: 23546 byte(s)
feat: add srcstats program

1 /*
2 * srcstats.c -- Source code statistics generator
3 *
4 * This file is part of OSN Commons.
5 * Copyright (C) 2024 OSN Developers.
6 *
7 * OSN Commons is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * any later version.
11 *
12 * OSN Commons is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with OSN Commons. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <ctype.h>
22 #include <dirent.h>
23 #include <errno.h>
24 #include <getopt.h>
25 #include <pthread.h>
26 #include <stdarg.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <sys/stat.h>
32 #include <sys/types.h>
33
34 #ifdef HAVE_CONFIG_H
35 # include "config.h"
36 #endif
37
38 #define PROG_CANONICAL_NAME "srcstats"
39 #define PROG_AUTHORS "Ar Rakin <[email protected]>"
40
41 /* TODO: Add support for more file types, and
42 output statistics separately for each file type. */
43
44 static const char *prog_name = NULL;
45
46 static struct option const long_options[] = {
47 { "help", no_argument, 0, 'h' },
48 { "version", no_argument, 0, 'v' },
49 { 0, 0, 0, 0 }
50 };
51
52 static const char *short_options = "hv";
53
54 struct codebase_scan_state
55 {
56 char *filename;
57 char *directory;
58 char *extension;
59 char *shebang_prog;
60 struct codebase_report *report;
61 };
62
63 struct codebase_report
64 {
65 unsigned long int files;
66 unsigned long int ignored;
67 unsigned long int directories;
68 unsigned long int lines;
69 unsigned long int blank_lines;
70 unsigned long int comment_lines;
71 unsigned long int code_lines;
72 char *directory;
73 };
74
75 static void codebase_report_analyze_c (struct codebase_scan_state *state,
76 FILE *file);
77 static void codebase_report_analyze_sh (struct codebase_scan_state *state,
78 FILE *file);
79
80 struct codebase_file_handler
81 {
82 void (*handler) (struct codebase_scan_state *, FILE *);
83 const char **extensions;
84 const char **filenames;
85 const char **shebangs;
86 };
87
88 /* clang-format off */
89 static struct codebase_file_handler codebase_file_handlers[] = {
90 {
91 .handler = &codebase_report_analyze_c,
92 .extensions = (const char *[]) {
93 "c",
94 "h",
95 "cpp",
96 "hpp",
97 "cc",
98 "hh",
99 "cxx",
100 "hxx",
101 "ts",
102 "js",
103 "java",
104 NULL
105 },
106 .filenames = NULL,
107 .shebangs = NULL
108 },
109 {
110 .handler = &codebase_report_analyze_sh,
111 .extensions = (const char *[]) {
112 "sh",
113 "bash",
114 "conf",
115 "fish",
116 "csh",
117 "zsh",
118 "am",
119 "ac",
120 NULL
121 },
122 .filenames = (const char *[]) {
123 "Makefile",
124 "Dockerfile",
125 NULL
126 },
127 .shebangs = (const char *[]) {
128 "sh",
129 "bash",
130 "fish",
131 "zsh",
132 "csh",
133 NULL
134 }
135 },
136 };
137 /* clang-format on */
138
139 static void
140 report_error (const char *format, ...)
141 {
142 va_list args;
143 va_start (args, format);
144 fprintf (stderr, "%s: ", prog_name);
145 vfprintf (stderr, format, args);
146 fprintf (stderr, ": %s", strerror (errno));
147 va_end (args);
148 fputc ('\n', stderr);
149 }
150
151 static void *
152 xrealloc (void *ptr, size_t size)
153 {
154 void *new_ptr = realloc (ptr, size);
155
156 if (new_ptr == NULL)
157 {
158 report_error ("xrealloc(): failed to reallocate memory");
159 exit (EXIT_FAILURE);
160 }
161
162 return new_ptr;
163 }
164
165 static char *
166 path_join (const char *p1, const char *p2, size_t *len)
167 {
168 char *path = NULL;
169 size_t length = 0;
170 size_t index = 0;
171
172 while (p1[index] != 0)
173 {
174 length++;
175 path = xrealloc (path, length);
176 path[index] = p1[index];
177 index++;
178 }
179
180 path = xrealloc (path, length + 1);
181 path[length++] = '/';
182
183 index = 0;
184
185 while (p2[index] != 0)
186 {
187 length++;
188 path = xrealloc (path, length);
189 path[length - 1] = p2[index];
190 index++;
191 }
192
193 path = xrealloc (path, length + 1);
194 path[length] = 0;
195
196 if (len)
197 *len = length;
198
199 return path;
200 }
201
202 static void
203 codebase_report_analyze_c (struct codebase_scan_state *state, FILE *file)
204 {
205 char *line = NULL;
206 size_t len = 0;
207 ssize_t read;
208 bool in_comment = false;
209 struct codebase_report *report = state->report;
210
211 while ((read = getline (&line, &len, file)) != -1)
212 {
213 bool comment_line_incremented = false;
214 ssize_t i = 0;
215
216 report->lines++;
217
218 if (in_comment)
219 {
220 while (i < read)
221 {
222 if (i + 1 < read
223 && (line[i] == '*' && line[i + 1] == '/'))
224 {
225 in_comment = false;
226 i += 2;
227 break;
228 }
229
230 i++;
231 }
232
233 report->comment_lines++;
234 comment_line_incremented = true;
235
236 if (i == read)
237 continue;
238 }
239
240 while (i < read && isspace (line[i]))
241 i++;
242
243 if (i == read)
244 {
245 report->blank_lines++;
246 continue;
247 }
248
249 if (i + 1 < read
250 && (line[i] == '\'' || line[i] == '"'
251 || ((strcmp (state->extension, "ts") == 0
252 || strcmp (state->extension, "js") == 0)
253 && line[i] == '`')))
254 {
255 char quote = line[i];
256
257 while (i < read)
258 {
259 if (line[i] == '\\')
260 i++;
261
262 if (i < read && line[i] == quote)
263 {
264 quote = 0;
265 break;
266 }
267
268 if (i < read)
269 i++;
270 }
271
272 if (quote == '`')
273 {
274 while ((read = getline (&line, &len, file)) != -1)
275 {
276 report->lines++;
277
278 if (line[0] == quote)
279 break;
280
281 report->code_lines++;
282 }
283
284 continue;
285 }
286
287 if (i == read)
288 {
289 report->code_lines++;
290 continue;
291 }
292 }
293
294 if (i + 1 < read && line[i] == '/' && line[i + 1] == '/')
295 {
296 if (!comment_line_incremented)
297 report->comment_lines++;
298
299 comment_line_incremented = true;
300 continue;
301 }
302
303 if (i + 1 < read && line[i] == '/' && line[i + 1] == '*')
304 {
305 in_comment = true;
306
307 if (!comment_line_incremented)
308 report->comment_lines++;
309
310 comment_line_incremented = true;
311
312 while (i < read)
313 {
314 if (i + 1 < read
315 && (line[i] == '*' && line[i + 1] == '/'))
316 {
317 in_comment = false;
318 i += 2;
319 break;
320 }
321
322 i++;
323 }
324 }
325
326 while (i < read && isspace (line[i]))
327 i++;
328
329 if (!in_comment && i < read)
330 report->code_lines++;
331 }
332
333 free (line);
334 }
335
336 static void
337 codebase_report_analyze_sh (struct codebase_scan_state *state, FILE *file)
338 {
339 char *line = NULL;
340 size_t len = 0;
341 ssize_t read;
342 struct codebase_report *report = state->report;
343
344 while ((read = getline (&line, &len, file)) != -1)
345 {
346 ssize_t i = 0;
347
348 report->lines++;
349
350 while (i < read && isspace (line[i]))
351 i++;
352
353 if (i == read)
354 {
355 report->blank_lines++;
356 continue;
357 }
358
359 if (i + 1 < read && line[i] == '#')
360 {
361 if (i + 2 >= read || line[i + 1] != '!')
362 report->comment_lines++;
363
364 continue;
365 }
366
367 if (i + 2 < read && line[i] == '<' && line[i + 1] == '<')
368 {
369 char *token = NULL;
370 size_t token_len = 0;
371
372 while (i < read && isspace (line[i]))
373 i++;
374
375 while (i < read && !isspace (line[i]))
376 {
377 token = xrealloc (token, ++token_len);
378 token[token_len - 1] = line[i];
379 i++;
380 }
381
382 token = xrealloc (token, ++token_len);
383 token[token_len - 1] = 0;
384
385 while ((read = getline (&line, &len, file)) != -1)
386 {
387 report->lines++;
388
389 if (strcmp (line, token) == 0)
390 break;
391
392 report->code_lines++;
393 }
394
395 free (token);
396 continue;
397 }
398
399 if (i + 1 < read && (line[i] == '\'' || line[i + 1] == '"'))
400 {
401 char quote = line[i];
402
403 while (i < read)
404 {
405 if (line[i] == '\\')
406 i++;
407
408 if (i < read && line[i] == quote)
409 {
410 quote = 0;
411 break;
412 }
413
414 if (i < read)
415 i++;
416 }
417
418 if (quote != 0)
419 {
420 while ((read = getline (&line, &len, file)) != -1)
421 {
422 report->lines++;
423
424 if (line[0] == quote)
425 break;
426
427 report->code_lines++;
428 }
429
430 continue;
431 }
432
433 if (i == read)
434 {
435 report->code_lines++;
436 continue;
437 }
438 }
439
440 while (i < read && isspace (line[i]))
441 i++;
442
443 if (i < read)
444 report->code_lines++;
445 }
446
447 free (line);
448 }
449
450 static void
451 codebase_scan_state_free (struct codebase_scan_state *state)
452 {
453 free (state->directory);
454 free (state->extension);
455 free (state->filename);
456 free (state->shebang_prog);
457 }
458
459 static char *
460 get_file_shebang (FILE *file)
461 {
462 char *line = NULL, *ret = NULL;
463 size_t len = 0;
464 ssize_t read;
465 long pos = ftell (file);
466
467 while ((read = getline (&line, &len, file)) != -1)
468 {
469 ssize_t index = 0;
470
471 while (index < read && (line[index] == ' ' || line[index] == '\t'))
472 index++;
473
474 if (index + 2 >= read)
475 continue;
476
477 if (line[index] == '#' && line[index + 1] == '!')
478 ret = strndup (line + 2, strlen (line + 2) - 1);
479
480 break;
481 }
482
483 fseek (file, pos, SEEK_SET);
484 free (line);
485 return ret;
486 }
487
488 static void
489 codebase_report_analyze_file (struct codebase_report *report, const char *path,
490 FILE *file)
491 {
492 const char *extension = strrchr (path, '.');
493 const char *filename = strrchr (path, '/');
494
495 extension = extension == NULL ? NULL : extension + 1;
496 filename = filename == NULL ? path : filename + 1;
497
498 for (size_t i = 0; i < sizeof (codebase_file_handlers)
499 / sizeof (codebase_file_handlers[0]);
500 i++)
501 {
502 struct codebase_scan_state state = {
503 .filename = strdup (filename),
504 .directory = strdup (report->directory),
505 .extension = NULL,
506 .shebang_prog = NULL,
507 .report = report,
508 };
509
510 if (codebase_file_handlers[i].extensions != NULL)
511 {
512 for (size_t j = 0;
513 codebase_file_handlers[i].extensions[j] != NULL; j++)
514 {
515 if (extension != NULL
516 && strcmp (
517 extension,
518 codebase_file_handlers[i].extensions[j])
519 == 0)
520 {
521 state.extension = strdup (extension);
522 codebase_file_handlers[i].handler (&state,
523 file);
524 codebase_scan_state_free (&state);
525 report->files++;
526 return;
527 }
528 }
529 }
530
531 if (codebase_file_handlers[i].filenames != NULL)
532 {
533 for (size_t j = 0;
534 codebase_file_handlers[i].filenames[j] != NULL; j++)
535 {
536 if (strcmp (filename,
537 codebase_file_handlers[i].filenames[j])
538 == 0)
539 {
540 codebase_file_handlers[i].handler (&state,
541 file);
542 codebase_scan_state_free (&state);
543 report->files++;
544 return;
545 }
546 }
547 }
548
549 char *shebang = get_file_shebang (file);
550
551 if (shebang == NULL || codebase_file_handlers[i].shebangs == NULL)
552 {
553 free (shebang);
554 free (state.filename);
555 free (state.directory);
556 continue;
557 }
558
559 for (size_t j = 0; codebase_file_handlers[i].shebangs[j] != NULL;
560 j++)
561 {
562 const char *prog = codebase_file_handlers[i].shebangs[j];
563 const char *shebang_ptr = shebang;
564
565 if (strncmp (shebang_ptr, "/usr/bin/", 9) == 0)
566 shebang_ptr += 9;
567 else if (strncmp (shebang_ptr, "/bin/", 5) == 0)
568 shebang_ptr += 5;
569 else if (strncmp (shebang_ptr, "/usr/bin/env ", 14) == 0)
570 {
571 shebang_ptr += 14;
572
573 while (*shebang_ptr == ' ' || *shebang_ptr == '\t')
574 shebang_ptr++;
575 }
576
577 if (strcmp (prog, shebang_ptr) == 0)
578 {
579 state.extension = strdup (prog);
580 codebase_file_handlers[i].handler (&state, file);
581 codebase_scan_state_free (&state);
582 free (shebang);
583 report->files++;
584 return;
585 }
586 }
587
588 free (shebang);
589 free (state.filename);
590 free (state.directory);
591 }
592
593 report->ignored++;
594 }
595
596 static void
597 codebase_report_free (struct codebase_report *report)
598 {
599 free ((void *) report->directory);
600 }
601
602 static bool
603 codebase_report_scan (struct codebase_report *report, const char *directory)
604 {
605 DIR *dirstream = opendir (directory);
606 struct dirent *entry;
607 char **entries = NULL;
608 size_t entries_size = 0;
609
610 if (dirstream == NULL)
611 {
612 report_error ("failed to open directory `%s'", directory);
613 return false;
614 }
615
616 report->directories++;
617
618 while ((entry = readdir (dirstream)) != NULL)
619 {
620 if (strcmp (entry->d_name, ".") == 0
621 || strcmp (entry->d_name, "..") == 0)
622 continue;
623
624 entries = xrealloc (entries, (++entries_size) * sizeof (char *));
625 entries[entries_size - 1] = strdup (entry->d_name);
626 }
627
628 entries = xrealloc (entries, (++entries_size) * sizeof (char *));
629 entries[entries_size - 1] = NULL;
630 closedir (dirstream);
631
632 for (size_t i = 0; i < entries_size - 1; i++)
633 {
634 size_t path_len = 0;
635 char *path = path_join (directory, entries[i], &path_len);
636 struct stat st;
637
638 if (lstat (path, &st) == -1)
639 {
640 report_error ("failed to stat `%s'", path);
641 continue;
642 }
643
644 if (S_ISDIR (st.st_mode))
645 {
646 codebase_report_scan (report, path);
647 }
648 else if (S_ISREG (st.st_mode))
649 {
650 FILE *file = fopen (path, "r");
651
652 if (file == NULL)
653 {
654 report_error ("failed to open file `%s'", path);
655 continue;
656 }
657
658 codebase_report_analyze_file (report, path, file);
659 fclose (file);
660 }
661
662 free (path);
663 }
664
665 for (size_t i = 0; i < entries_size - 1; i++)
666 free (entries[i]);
667
668 free (entries);
669 return true;
670 }
671
672 static bool
673 codebase_report_scan_r (struct codebase_report *report, const char *directory)
674 {
675 report->directory = strdup (directory);
676 return codebase_report_scan (report, directory);
677 }
678
679 static void
680 codebase_report_print (const struct codebase_report *report)
681 {
682 const int widths[] = { 13, 14, 11, 14, 12, 13, 14 };
683 const unsigned long int values[]
684 = { report->files, report->ignored, report->directories,
685 report->lines, report->blank_lines, report->comment_lines,
686 report->code_lines };
687
688 printf ("\033[2m** Report for `%s':\033[0m\n", report->directory);
689
690 /* clang-format off */
691 printf ("+---------------+----------------+-------------+----------------+--------------+---------------+----------------+\n");
692 printf ("| \033[1mFiles\033[0m | \033[1mIgnored Files\033[0m | \033[1mDirectories\033[0m | \033[1mLines\033[0m | \033[1mBlank Lines\033[0m | \033[1mComment Lines\033[0m | \033[1mCode Lines\033[0m |\n");
693 printf ("+---------------+----------------+-------------+----------------+--------------+---------------+----------------+\n|");
694 /* clang-format on */
695
696 for (size_t i = 0; i < sizeof (values) / sizeof (values[0]); i++)
697 {
698 const int width = widths[i];
699 const long unsigned int value = values[i];
700 printf (" \033[1;%sm%-*lu\033[0m |",
701 i < 3 ? "1"
702 : i == 3 ? "34"
703 : i == 4 ? "2"
704 : i == 5 ? "2"
705 : i == 6 ? "32"
706 : "1",
707 width, value);
708 }
709
710 /* clang-format off */
711 printf ("\n+---------------+----------------+-------------+----------------+--------------+---------------+----------------+\n");
712 /* clang-format on */
713 }
714
715 [[noreturn]]
716 static void
717 usage (bool error)
718 {
719 FILE *stream = error ? stderr : stdout;
720 fprintf (stream, "Usage: %s [OPTION]... <DIRECTORY>...\n", prog_name);
721 fputs ("Show statistics for the given codebase.\n", stream);
722 fputc ('\n', stream);
723 fputs (" -h, --help Display this help and exit\n", stream);
724 fputs (" -v, --version Output version information and exit\n", stream);
725 fputc ('\n', stream);
726 fputs ("Bug reports and feedback should be sent to \n<" PACKAGE_BUGREPORT
727 ">.\n",
728 stream);
729 exit (error ? EXIT_FAILURE : EXIT_SUCCESS);
730 }
731
732 static void
733 show_version (void)
734 {
735 printf (PROG_CANONICAL_NAME " (" PACKAGE_FULLNAME ") v" PACKAGE_VERSION
736 "\n");
737 fputc ('\n', stdout);
738 printf ("License GPLv3+: GNU GPL version 3 or later "
739 "<http://gnu.org/licenses/gpl.html>.\n");
740 printf (
741 "This is free software: you are free to change and redistribute it.\n");
742 printf ("There is NO WARRANTY, to the extent permitted by law.\n");
743 fputc ('\n', stdout);
744 printf ("Written by " PROG_AUTHORS ".\n");
745 }
746
747 [[noreturn]]
748 static void
749 invalid_usage (const char *msg)
750 {
751 fprintf (stderr, "%s: %s\n", prog_name, msg);
752 fprintf (stderr, "Try `%s --help' for more information.\n", prog_name);
753 exit (EXIT_FAILURE);
754 }
755
756 int
757 main (int argc, char **argv)
758 {
759 prog_name = argv[0];
760 int opt;
761
762 while ((opt = getopt_long (argc, argv, short_options, long_options, NULL))
763 != -1)
764 {
765 switch (opt)
766 {
767 case 'h':
768 usage (false);
769 case 'v':
770 show_version ();
771 exit (EXIT_SUCCESS);
772 case '?':
773 fprintf (stderr, "Try `%s --help' for more information.\n",
774 prog_name);
775 exit (EXIT_FAILURE);
776 default:
777 abort ();
778 }
779 }
780
781 if (optind == argc)
782 invalid_usage ("missing directory operand");
783
784 bool success = false;
785
786 for (int i = optind; i < argc; i++)
787 {
788 struct codebase_report report = { 0 };
789
790 if (!codebase_report_scan_r (&report, argv[i]))
791 {
792 continue;
793 }
794
795 codebase_report_print (&report);
796 codebase_report_free (&report);
797 success = true;
798 }
799
800 return success ? EXIT_SUCCESS : EXIT_FAILURE;
801 }

[email protected]
ViewVC Help
Powered by ViewVC 1.1.26